├── .clang-format
├── .flake8
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── GETTING_STARTED.md
├── INSTALL.md
├── MODEL_ZOO.md
├── README.md
├── configs
    ├── Base-RCNN-C4.yaml
    ├── Base-RCNN-DilatedC5.yaml
    ├── Base-RCNN-FPN.yaml
    ├── Base-RetinaNet.yaml
    ├── COCO-Detection
    │   ├── fast_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_101_C4_3x.yaml
    │   ├── faster_rcnn_R_101_DC5_3x.yaml
    │   ├── faster_rcnn_R_101_FPN_3x.yaml
    │   ├── faster_rcnn_R_50_C4_1x.yaml
    │   ├── faster_rcnn_R_50_C4_3x.yaml
    │   ├── faster_rcnn_R_50_DC5_1x.yaml
    │   ├── faster_rcnn_R_50_DC5_3x.yaml
    │   ├── faster_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_50_FPN_3x.yaml
    │   ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
    │   ├── retinanet_R_101_FPN_3x.yaml
    │   ├── retinanet_R_50_FPN_1x.yaml
    │   ├── retinanet_R_50_FPN_3x.yaml
    │   ├── rpn_R_50_C4_1x.yaml
    │   └── rpn_R_50_FPN_1x.yaml
    ├── COCO-InstanceSegmentation
    │   ├── mask_rcnn_R_101_C4_3x.yaml
    │   ├── mask_rcnn_R_101_DC5_3x.yaml
    │   ├── mask_rcnn_R_101_FPN_3x.yaml
    │   ├── mask_rcnn_R_50_C4_1x.yaml
    │   ├── mask_rcnn_R_50_C4_3x.yaml
    │   ├── mask_rcnn_R_50_DC5_1x.yaml
    │   ├── mask_rcnn_R_50_DC5_3x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_3x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_3x.yaml
    ├── COCO-Keypoints
    │   ├── Base-Keypoint-RCNN-FPN.yaml
    │   ├── keypoint_rcnn_R_101_FPN_3x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_3x.yaml
    │   └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
    ├── COCO-PanopticSegmentation
    │   ├── Base-Panoptic-FPN.yaml
    │   ├── panoptic_fpn_R_101_3x.yaml
    │   ├── panoptic_fpn_R_50_1x.yaml
    │   └── panoptic_fpn_R_50_3x.yaml
    ├── Cityscapes
    │   └── mask_rcnn_R_50_FPN.yaml
    ├── Detectron1-Comparisons
    │   ├── README.md
    │   ├── faster_rcnn_R_50_FPN_noaug_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_R_50_FPN_noaug_1x.yaml
    ├── LVIS-InstanceSegmentation
    │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── Misc
    │   ├── cascade_mask_rcnn_R_50_FPN_1x.yaml
    │   ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   └── semantic_R_50_FPN_1x.yaml
    ├── PascalVOC-Detection
    │   └── faster_rcnn_R_50_C4.yaml
    ├── RefSeg_deeplab_Dilate_1x_graph.yaml
    ├── WeaklyGrounding-RN101-C4.yaml
    └── quick_schedules
    │   ├── README.md
    │   ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── fast_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_instant_test.yaml
    │   ├── mask_rcnn_R_50_C4_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_instant_test.yaml
    │   ├── mask_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── panoptic_fpn_R_50_inference_acc_test.yaml
    │   ├── panoptic_fpn_R_50_instant_test.yaml
    │   ├── panoptic_fpn_R_50_training_acc_test.yaml
    │   ├── retinanet_R_50_FPN_inference_acc_test.yaml
    │   ├── retinanet_R_50_FPN_instant_test.yaml
    │   ├── rpn_R_50_FPN_inference_acc_test.yaml
    │   ├── rpn_R_50_FPN_instant_test.yaml
    │   ├── semantic_R_50_FPN_inference_acc_test.yaml
    │   ├── semantic_R_50_FPN_instant_test.yaml
    │   └── semantic_R_50_FPN_training_acc_test.yaml
├── demo
    ├── README.md
    ├── demo.py
    └── predictor.py
├── detectron2
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── detection_checkpoint.py
    │   └── model_zoo.py
    ├── config
    │   ├── __init__.py
    │   ├── compat.py
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── catalog.py
    │   ├── common.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── builtin.py
    │   │   ├── builtin_meta.py
    │   │   ├── cityscapes.py
    │   │   ├── coco.py
    │   │   ├── flickr30k.py
    │   │   ├── lvis.py
    │   │   ├── lvis_v0_5_categories.py
    │   │   ├── pascal_voc.py
    │   │   ├── refcoco.py
    │   │   └── register_coco.py
    │   ├── detection_utils.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   └── grouped_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── transform.py
    │   │   └── transform_gen.py
    ├── engine
    │   ├── __init__.py
    │   ├── defaults.py
    │   ├── hooks.py
    │   ├── launch.py
    │   └── train_loop.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── cityscapes_evaluation.py
    │   ├── coco_evaluation.py
    │   ├── evaluator.py
    │   ├── flickr30k_evaluation_grounding.py
    │   ├── flickr30k_evaluation_grounding_lite.py
    │   ├── flickr30k_evaluation_grounding_reg.py
    │   ├── flickr30k_evaluation_grounding_reg_ml.py
    │   ├── flickr30k_evaluation_grounding_v1.py
    │   ├── flickr30k_evaluation_kac.py
    │   ├── lvis_evaluation.py
    │   ├── panoptic_evaluation.py
    │   ├── pascal_voc_evaluation.py
    │   ├── recoco_evaluation_grounding.py
    │   ├── sem_seg_evaluation.py
    │   └── testing.py
    ├── layers
    │   ├── __init__.py
    │   ├── batch_norm.py
    │   ├── csrc
    │   │   ├── README.md
    │   │   ├── ROIAlign
    │   │   │   ├── ROIAlign.h
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   └── ROIAlign_cuda.cu
    │   │   ├── ROIAlignRotated
    │   │   │   ├── ROIAlignRotated.h
    │   │   │   ├── ROIAlignRotated_cpu.cpp
    │   │   │   └── ROIAlignRotated_cuda.cu
    │   │   ├── box_iou_rotated
    │   │   │   ├── box_iou_rotated.h
    │   │   │   ├── box_iou_rotated_cpu.cpp
    │   │   │   ├── box_iou_rotated_cuda.cu
    │   │   │   └── box_iou_rotated_utils.h
    │   │   ├── deformable
    │   │   │   ├── deform_conv.h
    │   │   │   ├── deform_conv_cuda.cu
    │   │   │   └── deform_conv_cuda_kernel.cu
    │   │   ├── nms_rotated
    │   │   │   ├── nms_rotated.h
    │   │   │   ├── nms_rotated_cpu.cpp
    │   │   │   └── nms_rotated_cuda.cu
    │   │   └── vision.cpp
    │   ├── deform_conv.py
    │   ├── generate_union_region.py
    │   ├── gpu_hangon.py
    │   ├── mask_ops.py
    │   ├── move2cpu.py
    │   ├── nms.py
    │   ├── numerical_stability_softmax.py
    │   ├── ops.py
    │   ├── prefetcher.py
    │   ├── roi_align.py
    │   ├── roi_align_rotated.py
    │   ├── rotated_boxes.py
    │   ├── shape_spec.py
    │   ├── spatial_coordinate.py
    │   ├── weighted_smooth_l1_loss.py
    │   └── wrappers.py
    ├── modeling
    │   ├── __init__.py
    │   ├── anchor_generator.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── build.py
    │   │   ├── deeplab.py
    │   │   ├── deeplabv2.py
    │   │   ├── fpn.py
    │   │   ├── resnet.py
    │   │   └── resnet101.py
    │   ├── box_regression.py
    │   ├── matcher.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── panoptic_fpn.py
    │   │   ├── rcnn.py
    │   │   ├── retinanet.py
    │   │   └── semantic_seg.py
    │   ├── poolers.py
    │   ├── postprocessing.py
    │   ├── proposal_generator
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── proposal_utils.py
    │   │   ├── rpn.py
    │   │   ├── rpn_outputs.py
    │   │   ├── rrpn.py
    │   │   └── rrpn_outputs.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── keypoint_head.py
    │   │   ├── mask_head.py
    │   │   └── roi_heads.py
    │   ├── sampling.py
    │   ├── test_time_augmentation.py
    │   └── weaklygrounding
    │   │   ├── kac_net.py
    │   │   ├── loss.py
    │   │   ├── loss_kac.py
    │   │   ├── phr_emebdding_kac.py
    │   │   ├── phrase_embedding_weakly.py
    │   │   ├── phrase_embedding_weakly_v1.py
    │   │   ├── vg_detection_weakly.py
    │   │   ├── vg_detection_weakly_v1.py
    │   │   ├── vg_detection_weakly_v3.py
    │   │   ├── weakly_visual_grounding_reg_rel.py
    │   │   └── weakly_visual_grounding_regression.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── boxes.py
    │   ├── image_list.py
    │   ├── instances.py
    │   ├── keypoints.py
    │   ├── masks.py
    │   └── rotated_boxes.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── colormap.py
    │   ├── comm.py
    │   ├── env.py
    │   ├── events.py
    │   ├── logger.py
    │   ├── registry.py
    │   ├── serialize.py
    │   ├── video_visualizer.py
    │   └── visualizer.py
├── dev
    ├── README.md
    ├── linter.sh
    ├── parse_results.sh
    ├── run_inference_tests.sh
    └── run_instant_tests.sh
├── docs
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── conf.py
    ├── index.rst
    ├── modules
    │   ├── checkpoint.rst
    │   ├── config.rst
    │   ├── data.datasets.rst
    │   ├── data.rst
    │   ├── data.samplers.rst
    │   ├── data.transforms.rst
    │   ├── engine.rst
    │   ├── evaluation.rst
    │   ├── index.rst
    │   ├── layers.rst
    │   ├── modeling.rst
    │   ├── solver.rst
    │   ├── structures.rst
    │   └── utils.rst
    ├── notes
    │   ├── benchmarks.md
    │   ├── changelog.md
    │   ├── compatibility.md
    │   └── index.rst
    └── tutorials
    │   ├── configs.md
    │   ├── data_loading.md
    │   ├── datasets.md
    │   ├── extend.md
    │   ├── getting_started.md
    │   ├── index.rst
    │   ├── install.md
    │   ├── models.md
    │   └── training.md
├── killpy.sh
├── outputs
    └── .DS_Store
├── projects
    ├── DensePose
    │   ├── README.md
    │   ├── apply_net.py
    │   ├── configs
    │   │   ├── Base-DensePose-RCNN.yaml
    │   │   ├── densepose_R_50_FPN_s1x.yaml
    │   │   └── quick_schedules
    │   │   │   ├── densepose_R_50_FPN_inference_acc_test.yaml
    │   │   │   ├── densepose_R_50_FPN_instant_test.yaml
    │   │   │   └── densepose_R_50_FPN_training_acc_test.yaml
    │   ├── densepose
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── dataset.py
    │   │   ├── dataset_mapper.py
    │   │   ├── densepose_coco_evaluation.py
    │   │   ├── densepose_head.py
    │   │   ├── evaluator.py
    │   │   ├── roi_head.py
    │   │   ├── structures.py
    │   │   ├── utils
    │   │   │   ├── dbhelper.py
    │   │   │   └── logger.py
    │   │   └── vis
    │   │   │   ├── base.py
    │   │   │   ├── bounding_box.py
    │   │   │   ├── densepose.py
    │   │   │   └── extractor.py
    │   ├── doc
    │   │   ├── TOOL_APPLY_NET.md
    │   │   └── TOOL_QUERY_DB.md
    │   ├── query_db.py
    │   └── train_net.py
    ├── README.md
    └── TridentNet
    │   ├── README.md
    │   ├── configs
    │       ├── Base-TridentNet-Fast-C4.yaml
    │       ├── tridentnet_fast_R_101_C4_3x.yaml
    │       └── tridentnet_fast_R_50_C4_1x.yaml
    │   ├── train_net.py
    │   └── tridentnet
    │       ├── __init__.py
    │       ├── config.py
    │       ├── trident_backbone.py
    │       ├── trident_conv.py
    │       ├── trident_rcnn.py
    │       └── trident_rpn.py
├── scripts
    ├── demo.sh
    ├── test.sh
    ├── test_kac.sh
    ├── train.sh
    ├── train_baseline.sh
    └── train_kac.sh
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── test_anchor_generator.py
    ├── test_box2box_transform.py
    ├── test_boxes.py
    ├── test_checkpoint.py
    ├── test_config.py
    ├── test_data_transform.py
    ├── test_fast_rcnn.py
    ├── test_mask_ops.py
    ├── test_nms_rotated.py
    ├── test_roi_align.py
    ├── test_roi_align_rotated.py
    ├── test_roi_heads.py
    ├── test_roi_pooler.py
    ├── test_rotated_boxes.py
    ├── test_rpn.py
    └── test_visualizer.py
├── tools
    ├── benchmark.py
    ├── train_kac.py
    ├── train_net.py
    ├── train_weakly_grounding.py
    ├── visualize_data.py
    └── visualize_json_results.py
└── unit_test.py


/.clang-format:
--------------------------------------------------------------------------------
 1 | AccessModifierOffset: -1
 2 | AlignAfterOpenBracket: AlwaysBreak
 3 | AlignConsecutiveAssignments: false
 4 | AlignConsecutiveDeclarations: false
 5 | AlignEscapedNewlinesLeft: true
 6 | AlignOperands:   false
 7 | AlignTrailingComments: false
 8 | AllowAllParametersOfDeclarationOnNextLine: false
 9 | AllowShortBlocksOnASingleLine: false
10 | AllowShortCaseLabelsOnASingleLine: false
11 | AllowShortFunctionsOnASingleLine: Empty
12 | AllowShortIfStatementsOnASingleLine: false
13 | AllowShortLoopsOnASingleLine: false
14 | AlwaysBreakAfterReturnType: None
15 | AlwaysBreakBeforeMultilineStrings: true
16 | AlwaysBreakTemplateDeclarations: true
17 | BinPackArguments: false
18 | BinPackParameters: false
19 | BraceWrapping:
20 |   AfterClass:      false
21 |   AfterControlStatement: false
22 |   AfterEnum:       false
23 |   AfterFunction:   false
24 |   AfterNamespace:  false
25 |   AfterObjCDeclaration: false
26 |   AfterStruct:     false
27 |   AfterUnion:      false
28 |   BeforeCatch:     false
29 |   BeforeElse:      false
30 |   IndentBraces:    false
31 | BreakBeforeBinaryOperators: None
32 | BreakBeforeBraces: Attach
33 | BreakBeforeTernaryOperators: true
34 | BreakConstructorInitializersBeforeComma: false
35 | BreakAfterJavaFieldAnnotations: false
36 | BreakStringLiterals: false
37 | ColumnLimit:     80
38 | CommentPragmas:  '^ IWYU pragma:'
39 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
40 | ConstructorInitializerIndentWidth: 4
41 | ContinuationIndentWidth: 4
42 | Cpp11BracedListStyle: true
43 | DerivePointerAlignment: false
44 | DisableFormat:   false
45 | ForEachMacros:   [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ]
46 | IncludeCategories:
47 |   - Regex:           '^<.*\.h(pp)?>'
48 |     Priority:        1
49 |   - Regex:           '^<.*'
50 |     Priority:        2
51 |   - Regex:           '.*'
52 |     Priority:        3
53 | IndentCaseLabels: true
54 | IndentWidth:     2
55 | IndentWrappedFunctionNames: false
56 | KeepEmptyLinesAtTheStartOfBlocks: false
57 | MacroBlockBegin: ''
58 | MacroBlockEnd:   ''
59 | MaxEmptyLinesToKeep: 1
60 | NamespaceIndentation: None
61 | ObjCBlockIndentWidth: 2
62 | ObjCSpaceAfterProperty: false
63 | ObjCSpaceBeforeProtocolList: false
64 | PenaltyBreakBeforeFirstCallParameter: 1
65 | PenaltyBreakComment: 300
66 | PenaltyBreakFirstLessLess: 120
67 | PenaltyBreakString: 1000
68 | PenaltyExcessCharacter: 1000000
69 | PenaltyReturnTypeOnItsOwnLine: 200
70 | PointerAlignment: Left
71 | ReflowComments:  true
72 | SortIncludes:    true
73 | SpaceAfterCStyleCast: false
74 | SpaceBeforeAssignmentOperators: true
75 | SpaceBeforeParens: ControlStatements
76 | SpaceInEmptyParentheses: false
77 | SpacesBeforeTrailingComments: 1
78 | SpacesInAngles:  false
79 | SpacesInContainerLiterals: true
80 | SpacesInCStyleCastParentheses: false
81 | SpacesInParentheses: false
82 | SpacesInSquareBrackets: false
83 | Standard:        Cpp11
84 | TabWidth:        8
85 | UseTab:          Never
86 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | # This is an example .flake8 config, used when developing *Black* itself.
 2 | # Keep in sync with setup.cfg which is used for source packages.
 3 | 
 4 | [flake8]
 5 | ignore = W503, E203, E221, C901
 6 | max-line-length = 100
 7 | max-complexity = 18
 8 | select = B,C,E,F,W,T4,B9
 9 | exclude = build,__init__.py
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | .vscode/*
 7 | flickr30k_datasets/*
 8 | 
 9 | *.jpg
10 | *.png
11 | *.txt
12 | 
13 | # compilation and distribution
14 | __pycache__
15 | _ext
16 | *.pyc
17 | *.so
18 | detectron2.egg-info/
19 | build/
20 | dist/
21 | 
22 | # pytorch/python/numpy formats
23 | *.pth
24 | *.pkl
25 | *.npy
26 | 
27 | # ipython/jupyter notebooks
28 | *.ipynb
29 | **/.ipynb_checkpoints/
30 | 
31 | # Editor temporaries
32 | *.swn
33 | *.swo
34 | *.swp
35 | *~
36 | 
37 | # Pycharm editor settings
38 | .idea
39 | 
40 | # project dirs
41 | /datasets
42 | /models
43 | /RefSegDatasets
44 | .github/
45 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to detectron2
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from \muaster`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to detectron2, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
32 | 


--------------------------------------------------------------------------------
/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | ## Relation-aware instance refinement for weakly supervised visual grounding
 2 | 
 3 | ### 1. build detectron2 by following the official instrucment
 4 | 
 5 | ### 2. training for flickr30k entities
 6 | ```
 7 | sh scripts/train.sh ## change the MODEL.VG.NETWORK 'RegRel' into "Baseline", 'Baseline_s2', 'Reg' to get the ablation study results
 8 | ```
 9 | 
10 | ### 3. training for KAC models
11 | 
12 | ```
13 | sh scripts/train_kac.sh ## get the final results
14 | 
15 | ```
16 | 
17 | ### 4. data preparation
18 | 
19 | We will release the processed dataset later.


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) also has step-by-step instructions that install detectron2.
 4 | 
 5 | ### Requirements
 6 | - Python >= 3.6
 7 | - PyTorch 1.3
 8 | - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 9 | 	You can install them together at [pytorch.org](http://pytorch.org) to make sure of this.
10 | - OpenCV, needed by demo and visualization
11 | - [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'`
12 | - pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
13 | - GCC >= 4.9
14 | - apex  
15 | git clone https://www.github.com/nvidia/apex  
16 | cd apex  
17 | python setup.py install
18 | - datasets  
19 | 
20 | 
21 | 
22 | ### Build detectron2
23 | 
24 | After having the above dependencies, run:
25 | ```
26 | git clone git@github.com:facebookresearch/detectron2.git
27 | cd detectron2
28 | export TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" ## compile for every platform
29 | python setup.py build develop
30 | 
31 | # or if you are on macOS
32 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
33 | 
34 | # or, as an alternative to `setup.py`, do
35 | # pip install .
36 | ```
37 | Note: you may need to rebuild detectron2 after reinstalling a different build of PyTorch.
38 | 
39 | ### Common Installation Issues
40 | 
41 | + Undefined torch/aten symbols, or segmentation fault immediately when running the library.
42 |   This may mean one of the two:
43 | 
44 | 	* detectron2 or torchvision is not compiled with the version of PyTorch you're running.
45 | 
46 | 		If you use a pre-built torchvision, uninstall torchvision & pytorch, and reinstall them
47 | 		following [pytorch.org](http://pytorch.org).
48 | 		If you manually build detectron2 or torchvision, remove the files you built (`build/`, `**/*.so`)
49 | 		and rebuild them.
50 | 
51 | 	* detectron2 or torchvision is not compiled using gcc >= 4.9.
52 | 
53 | 	  You'll see a warning message during compilation in this case. Please remove the files you build,
54 | 		and rebuild them.
55 | 		Technically, you need the identical compiler that's used to build pytorch to guarantee
56 | 		compatibility. But in practice, gcc >= 4.9 should work OK.
57 | 
58 | + Undefined cuda symbols. The version of NVCC you use to build detectron2 or torchvision does
59 | 	not match the version of cuda you are running with.
60 | 	This happens sometimes when using anaconda.
61 | 
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Relation-aware instance refinement for weakly supervised visual grounding
 2 | 
 3 | ### 1. build detectron2 by following the official instrucment
 4 | 
 5 | ### 2. training for flickr30k entities
 6 | ```
 7 | sh scripts/train.sh ## change the MODEL.VG.NETWORK 'RegRel' into "Baseline", 'Baseline_s2', 'Reg' to get the ablation study results
 8 | ```
 9 | 
10 | ### 3. training for KAC models
11 | 
12 | ```
13 | sh scripts/train_kac.sh ## get the final results
14 | 
15 | ```
16 | 
17 | ### 4. data preparation
18 | 
19 | We will release the processed dataset later.


--------------------------------------------------------------------------------
/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   IMS_PER_BATCH: 16
13 |   BASE_LR: 0.02
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | 
19 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | 


--------------------------------------------------------------------------------
/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 | DATASETS:
15 |   TRAIN: ("coco_2017_train",)
16 |   TEST: ("coco_2017_val",)
17 | SOLVER:
18 |   IMS_PER_BATCH: 16
19 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
20 |   STEPS: (60000, 80000)
21 |   MAX_ITER: 90000
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   LOAD_PROPOSALS: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "PrecomputedProposals"
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 |   TEST: ("coco_2017_val",)
14 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 |   # proposals are part of the dataset_dicts, and take a lot of RAM
17 |   NUM_WORKERS: 2
18 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train",)
 9 |   TEST: ("coco_2014_val",)
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     PRE_NMS_TOPK_TEST: 12000
10 |     POST_NMS_TOPK_TEST: 2000
11 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     POST_NMS_TOPK_TEST: 2000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   KEYPOINT_ON: True
 4 |   ROI_HEADS:
 5 |     NUM_CLASSES: 1
 6 |   ROI_BOX_HEAD:
 7 |     SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
 8 |   RPN:
 9 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
10 |     # 1000 proposals per-image is found to hurt box AP.
11 |     # Therefore we increase it to 1500 per-image.
12 |     POST_NMS_TOPK_TRAIN: 1500
13 | DATASETS:
14 |   TRAIN: ("keypoints_coco_2017_train",)
15 |   TEST: ("keypoints_coco_2017_val",)
16 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (210000, 250000)
12 |   MAX_ITER: 270000
13 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   MASK_ON: True
 5 |   SEM_SEG_HEAD:
 6 |     LOSS_WEIGHT: 0.5
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_separated",)
 9 |   TEST: ("coco_2017_val_panoptic_separated",)
10 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   # For better, more stable performance initialize from COCO
 5 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 6 |   MASK_ON: True
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 8
 9 | # This is the setting used in Mask R-CNN paper, Appendix A
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
12 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
13 |   MIN_SIZE_TEST: 1024
14 |   MAX_SIZE_TRAIN: 2048
15 |   MAX_SIZE_TEST: 2048
16 | DATASETS:
17 |   TRAIN: ("cityscapes_fine_instance_seg_train",)
18 |   TEST: ("cityscapes_fine_instance_seg_val",)
19 | SOLVER:
20 |   BASE_LR: 0.01
21 |   STEPS: (18000,)
22 |   MAX_ITER: 24000
23 |   IMS_PER_BATCH: 8
24 | TEST:
25 |   EVAL_PERIOD: 8000
26 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 | INPUT:
16 |   # no scale augmentation
17 |   MIN_SIZE_TRAIN: (800, )
18 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1
 9 |   ROI_KEYPOINT_HEAD:
10 |     POOLER_RESOLUTION: 14
11 |     POOLER_SAMPLING_RATIO: 2
12 |     POOLER_TYPE: "ROIAlign"
13 |   # Detectron1 uses smooth L1 loss with some magic beta values.
14 |   # The defaults are changed to L1 loss in Detectron2.
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0
17 |     POOLER_SAMPLING_RATIO: 2
18 |     POOLER_TYPE: "ROIAlign"
19 |   RPN:
20 |     SMOOTH_L1_BETA: 0.1111
21 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
22 |     # 1000 proposals per-image is found to hurt box AP.
23 |     # Therefore we increase it to 1500 per-image.
24 |     POST_NMS_TOPK_TRAIN: 1500
25 | DATASETS:
26 |   TRAIN: ("keypoints_coco_2017_train",)
27 |   TEST: ("keypoints_coco_2017_val",)
28 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 |   ROI_MASK_HEAD:
16 |     POOLER_SAMPLING_RATIO: 2
17 |     POOLER_TYPE: "ROIAlign"
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1230
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v0.5_train",)
18 |   TEST: ("lvis_v0.5_val",)
19 | TEST:
20 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
23 |   REPEAT_THRESHOLD: 0.001
24 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 152
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |   ROI_HEADS:
12 |     NAME: "CascadeROIHeads"
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_CONV: 4
16 |     NUM_FC: 1
17 |     NORM: "GN"
18 |     CLS_AGNOSTIC_BBOX_REG: True
19 |   ROI_MASK_HEAD:
20 |     NUM_CONV: 8
21 |     NORM: "GN"
22 |   RPN:
23 |     POST_NMS_TOPK_TRAIN: 2000
24 | SOLVER:
25 |   IMS_PER_BATCH: 128
26 |   STEPS: (35000, 45000)
27 |   MAX_ITER: 50000
28 |   BASE_LR: 0.16
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 864)
31 |   MIN_SIZE_TRAIN_SAMPLING: "range"
32 |   MAX_SIZE_TRAIN: 1440
33 |   CROP:
34 |     ENABLED: True
35 | TEST:
36 |   EVAL_PERIOD: 2500
37 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_BOX_HEAD:
 8 |     CLS_AGNOSTIC_BBOX_REG: True
 9 |   ROI_MASK_HEAD:
10 |     CLS_AGNOSTIC_MASK: True
11 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8 |     DEFORM_MODULATED: False
9 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "GN"
 8 |     STRIDE_IN_1X1: False
 9 |   FPN:
10 |     NORM: "GN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "GN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "GN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | 


--------------------------------------------------------------------------------
/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml:
--------------------------------------------------------------------------------
 1 | # A large PanopticFPN for demo purposes.
 2 | # Use GN on backbone to support semantic seg.
 3 | # Use Cascade + Deform Conv to improve localization.
 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
 5 | MODEL:
 6 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
 7 |   RESNETS:
 8 |     DEPTH: 101
 9 |     NORM: "GN"
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |     STRIDE_IN_1X1: False
12 |   FPN:
13 |     NORM: "GN"
14 |   ROI_HEADS:
15 |     NAME: CascadeROIHeads
16 |   ROI_BOX_HEAD:
17 |     CLS_AGNOSTIC_BBOX_REG: True
18 |   ROI_MASK_HEAD:
19 |     NORM: "GN"
20 |   RPN:
21 |     POST_NMS_TOPK_TRAIN: 2000
22 | SOLVER:
23 |   STEPS: (105000, 125000)
24 |   MAX_ITER: 135000
25 |   IMS_PER_BATCH: 32
26 |   BASE_LR: 0.04
27 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | # INPUT:
 3 |   # It makes sense to divide by STD when training from scratch
 4 |   # But it seems to make no difference on the results and C2's models did't do this.
 5 |   # So we keep things consistent with C2.
 6 |   # PIXEL_STD: [57.375, 57.12, 58.395]
 7 | MODEL:
 8 |   WEIGHTS: ""
 9 |   MASK_ON: True
10 |   BACKBONE:
11 |     FREEZE_AT: 0
12 | 


--------------------------------------------------------------------------------
/configs/Misc/semantic_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | 


--------------------------------------------------------------------------------
/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/configs/RefSeg_deeplab_Dilate_1x_graph.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "StandardPooler"
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   MASK_ON: False
10 |   RESNETS:
11 |     DEPTH: 50
12 |   PRINT_WINDOWSIZE:
13 |     VIS_TRAIN: 100
14 |     VIS_TEST: 100
15 |   VG:
16 |     SPATIAL_FEAT: True
17 |     PHRASE_SELECT_TYPE: "Sum"
18 |     MAX_PHRASE_LEN: 5
19 |     VOCAB_FILE: "./RefSegDatasets/refseg_anno_new/refcoco+/vocab.json"
20 |     FEAT_SOURCE: 'bbox'
21 |     USE_EDGE_STRUCT: True
22 |     USE_REL_FEAT: True
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 7
25 |     POOLER_SAMPLING_RATIO: 2
26 | 
27 |   BACKBONE:
28 |       NAME: "build_deeplabv2_backbone"
29 |       PRETRAIN_PATH: './RefSegDatasets/refseg_anno_new/backbone_weight/deeplabv2.pth'
30 | 
31 | 
32 | 
33 | DATASETS:
34 |   TRAIN: ("refcoco+_train",)
35 |   TEST: ("refcoco+_val",)
36 | 
37 | SOLVER:
38 |   MASK_SOFTMAX: True
39 |   LR_SCHEDULER_NAME: "WarmupMultiStepLR"  ## "WarmupPolyLR"
40 |   IMS_PER_BATCH: 8
41 |   BASE_LR: 0.02
42 |   STEPS: (20000, 40000)
43 |   MAX_ITER: 1000
44 |   WEIGHT_DECAY: 0.0005
45 |   WEIGHT_DECAY_BIAS: 0.0005
46 |   BIAS_LR_FACTOR: 1.0
47 |   WEIGHT_DECAY_NORM: 0.0005
48 |   WARMUP_ITERS: 1000
49 |   WARMUP_FACTOR: 0.001
50 |   POLY_POWER: 0.9
51 |   POLY_END_LR: 0.00001
52 |   CHECKPOINT_PERIOD: 4000
53 |   BACKBONE_LR_FACTOR: 0.1
54 |   FIX_BACKBONE: False
55 |   FIX_BACKBONE_BN: False
56 |   USING_APEX: False
57 |   OPTIMIZER: "Adam"  # Sgd
58 |   INIT_PARA: True
59 | 
60 | INPUT:
61 |   MIN_SIZE_TRAIN: (321,)
62 |   MIN_SIZE_TEST: 321
63 | 
64 | TEST:
65 |   EVAL_PERIOD: 4000
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/configs/WeaklyGrounding-RN101-C4.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   META_ARCHITECTURE: "GeneralizedRCNN"
  3 |   RPN:
  4 |     PRE_NMS_TOPK_TEST: 6000
  5 |     POST_NMS_TOPK_TEST: 1000
  6 |   ROI_HEADS:
  7 |     NAME: "StandardPooler"
  8 |   WEIGHTS: ""
  9 |   MASK_ON: False
 10 |   RESNETS:
 11 |     DEPTH: 101
 12 |   PRINT_WINDOWSIZE:
 13 |     VIS_TRAIN: 100
 14 |     VIS_TEST: 100
 15 |   VG:
 16 |     SPATIAL_FEAT: True
 17 |     PHRASE_SELECT_TYPE: "Sum"
 18 |     VOCAB_PHR_FILE: "./flickr30k_datasets/skip-thoughts/vocab_phr.json"
 19 |     VOCAB_FILE: "./flickr30k_datasets/skip-thoughts/vocab.json"
 20 |     GLOVE_DICT_FILE: './flickr30k_datasets/flickr30k_anno/skip_thoughts_dict.pkl'
 21 |     VOCAB_EMBEDDING_FILE: './flickr30k_datasets/flickr30k_anno/skip_vocab_embed.pkl'
 22 |     VOCAB_REL_FILE: "./flickr30k_datasets/skip-thoughts/vocab_rel.json"
 23 |     ATTR_DICT_PATH: ''
 24 |     ATTR_VOCAB_PATH: ''
 25 |     MAX_PHRASE_LEN: 5
 26 |     EVAL_THRESH: 0.5
 27 |     PRECOMP_TOPK: 30
 28 |     USING_ELMO: False
 29 |     EMBEDDING_SOURCE: 'Sent'
 30 |     LSTM_BIDIRECTION: False
 31 |     USING_DET_KNOWLEDGE: False
 32 |     USING_DET_SCORE: False
 33 |     NUM_CST_PHR: 1
 34 |     USING_CST_RCONST: False
 35 |     S2_TOPK: 10
 36 |     S3_TOPK: 6
 37 |     REGRESSION: False
 38 |     ML_REG: False
 39 |     REG_LOSS_FACTOR: 0.1
 40 |     REG_IOU: 0.7
 41 |     NON_MAX_REG_LOSS: False
 42 |     REG_GAP_SCORE: 0.3
 43 |     NETWORK: 'Baseline'
 44 |     REG_THRESH: 0.5
 45 |     LOSS_STAGE_FACTOR: 0.5
 46 |     ATTR_LOSS_FACTOR: 1.0
 47 |     REL_CLS_LOSS_FACTOR: 1.0
 48 |     SEMANTIC_NOUNS_TOPK: 600
 49 |     SEMANTIC_NOUNS_PATH: './flickr30k_datasets/flickr30k_anno/noun_cates.json'
 50 |     SEMANTIC_ATTR_TOPK: 79
 51 |     SEMANTIC_ATTR_PATH: './flickr30k_datasets/flickr30k_anno/attr_cates.json'
 52 |     SEM_NOUNS_LOSS_FACTOR: 1.0
 53 |     MESSAGE_PARAM: False
 54 | 
 55 |   RELATION:
 56 |     IS_ON: False
 57 |     INTRA_LAN: False
 58 |     VIG_ON: False
 59 |     JOINT_TRANS: False
 60 |     REL_CATE_PATH: './flickr30k_datasets/flickr30k_anno/rel_cates.json'
 61 |     REL_LOSS_FACTOR: 1.0
 62 | 
 63 | 
 64 |   ROI_BOX_HEAD:
 65 |     POOLER_RESOLUTION: 7
 66 |     POOLER_SAMPLING_RATIO: 2
 67 | 
 68 |   BACKBONE:
 69 |     NAME: "build_resnet101_backbone"
 70 |     PRETRAIN_PATH: './flickr30k_datasets/bottom-up-pretrained/bottomup_pretrained_10_100.pth'
 71 |     PRETRAIN_CLSW_PATH: './flickr30k_datasets/bottom-up-pretrained/bottomup_pretrained_clsw.pth'
 72 |     LAST_LAYER_STRIDE: 1
 73 | 
 74 | DATALOADER:
 75 |   ASPECT_RATIO_GROUPING: True
 76 | 
 77 | DATASETS:
 78 |   TRAIN: ("flickr30k_train",)
 79 |   TEST: ("flickr30k_val",)
 80 |   NAME: 'flickr30k'
 81 | 
 82 | SOLVER:
 83 |   LR_SCHEDULER_NAME: "WarmupMultiStepLR"  ## "WarmupPolyLR"
 84 |   IMS_PER_BATCH: 8
 85 |   BASE_LR: 0.02
 86 |   STEPS: (20000, 40000)
 87 |   MAX_ITER: 1000
 88 |   REG_START_ITER: 7500
 89 |   WEIGHT_DECAY: 0.0005
 90 |   WEIGHT_DECAY_BIAS: 0.0005
 91 |   BIAS_LR_FACTOR: 1.0
 92 |   WEIGHT_DECAY_NORM: 0.0005
 93 |   WARMUP_ITERS: 500
 94 |   WARMUP_FACTOR: 0.001
 95 |   POLY_POWER: 0.9
 96 |   POLY_END_LR: 0.00001
 97 |   CHECKPOINT_PERIOD: 4000
 98 |   BACKBONE_LR_FACTOR: 0.1
 99 |   FIX_BACKBONE: True
100 |   USING_APEX: False
101 |   OPTIMIZER: "Adam"  # Sgd
102 |   INIT_PARA: False
103 |   ATTR_CLS_LOSS: True
104 | 
105 | 
106 | INPUT:
107 |   MIN_SIZE_TRAIN: (321,)
108 |   MIN_SIZE_TEST: 321
109 | 
110 | SEED: 10
111 | 
112 | TEST:
113 |   EVAL_PERIOD: 4000
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/README.md:
--------------------------------------------------------------------------------
1 | These are quick configs for performance or accuracy regression tracking purposes.
2 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 7 |   TEST: ("coco_2017_val_100",)
 8 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 9 | SOLVER:
10 |   BASE_LR: 0.005
11 |   STEPS: (30,)
12 |   MAX_ITER: 40
13 |   IMS_PER_BATCH: 4
14 | DATALOADER:
15 |   NUM_WORKERS: 2
16 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
4 | DATASETS:
5 |   TEST: ("keypoints_coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("keypoints_coco_2017_val_100",)
 7 |   TEST: ("keypoints_coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
14 |     LOSS_WEIGHT: 4.0
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
17 |   RPN:
18 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
19 | DATASETS:
20 |   TRAIN: ("keypoints_coco_2017_val",)
21 |   TEST: ("keypoints_coco_2017_val",)
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24 | SOLVER:
25 |   WARMUP_FACTOR: 0.33333333
26 |   WARMUP_ITERS: 100
27 |   STEPS: (5500, 5800)
28 |   MAX_ITER: 6000
29 | TEST:
30 |   EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
31 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |   ROI_BOX_HEAD:
14 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
15 |   RPN:
16 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
17 | DATASETS:
18 |   TRAIN: ("keypoints_coco_2017_val",)
19 |   TEST: ("keypoints_coco_2017_val",)
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | SOLVER:
23 |   WARMUP_FACTOR: 0.33333333
24 |   WARMUP_ITERS: 100
25 |   STEPS: (5500, 5800)
26 |   MAX_ITER: 6000
27 | TEST:
28 |   EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   IMS_PER_BATCH: 8  # base uses 16
17 |   WARMUP_FACTOR: 0.33333
18 |   WARMUP_ITERS: 100
19 |   STEPS: (11000, 11600)
20 |   MAX_ITER: 12000
21 | TEST:
22 |   EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
23 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 4 | DATASETS:
 5 |   TEST: ("coco_2017_val_100",)
 6 | TEST:
 7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02]]
 8 |   # expected results do not use test-time augmentation. TTA results are not verified.
 9 |   AUG:
10 |     ENABLED: True
11 |     MIN_SIZES: (400, 500)  # to save some time
12 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   WARMUP_FACTOR: 0.3333333
17 |   WARMUP_ITERS: 100
18 |   STEPS: (5500, 5800)
19 |   MAX_ITER: 6000
20 | TEST:
21 |   EXPECTED_RESULTS: [["bbox", "AP", 42.8, 0.8], ["segm", "AP", 35.7, 0.8]]
22 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100_panoptic_separated",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_100_panoptic_separated",)
12 |   TEST: ("coco_2017_val_100_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.005
15 |   STEPS: (30,)
16 |   MAX_ITER: 40
17 |   IMS_PER_BATCH: 4
18 | DATALOADER:
19 |   NUM_WORKERS: 2
20 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_panoptic_separated",)
12 |   TEST: ("coco_2017_val_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.01
15 |   WARMUP_FACTOR: 0.001
16 |   WARMUP_ITERS: 500
17 |   STEPS: (5500,)
18 |   MAX_ITER: 7000
19 | TEST:
20 |   EXPECTED_RESULTS: [["bbox", "AP", 46.80, 1.1], ["segm", "AP", 38.93, 0.7], ["sem_seg", "mIoU", 63.99, 0.9], ["panoptic_seg", "PQ", 48.23, 0.8]]
21 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 44.36, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   BASE_LR: 0.005
 9 |   STEPS: (30,)
10 |   MAX_ITER: 40
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   STEPS: (30,)
 9 |   MAX_ITER: 40
10 |   BASE_LR: 0.005
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
 9 | TEST:
10 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
11 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | SOLVER:
13 |   BASE_LR: 0.005
14 |   STEPS: (30,)
15 |   MAX_ITER: 40
16 |   IMS_PER_BATCH: 4
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WARMUP_FACTOR: 0.001
13 |   WARMUP_ITERS: 300
14 |   STEPS: (5500,)
15 |   MAX_ITER: 7000
16 | TEST:
17 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `detectron2` for inference.
 4 | 
 5 | You can start it by running it from this folder, using one of the following commands:
 6 | ```bash
 7 | # by default, it runs on the GPU
 8 | # for best results, use min-image-size 800
 9 | python webcam.py --min-image-size 800
10 | # can also run it on the CPU
11 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
12 | # or change the model that you want to use
13 | python webcam.py --config-file ../configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml --min-image-size 300 MODEL.DEVICE cpu
14 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
15 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
16 | ```
17 | 


--------------------------------------------------------------------------------
/detectron2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .utils.env import setup_environment
4 | 
5 | setup_environment()
6 | 
7 | 
8 | __version__ = "0.1"
9 | 


--------------------------------------------------------------------------------
/detectron2/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # File:
 4 | 
 5 | 
 6 | from . import model_zoo as _UNUSED  # register the handler
 7 | from .detection_checkpoint import DetectionCheckpointer
 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
 9 | 
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 | 


--------------------------------------------------------------------------------
/detectron2/checkpoint/detection_checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import pickle
 3 | from fvcore.common.checkpoint import Checkpointer
 4 | from fvcore.common.file_io import PathManager
 5 | 
 6 | import detectron2.utils.comm as comm
 7 | 
 8 | from .c2_model_loading import align_and_update_state_dicts
 9 | 
10 | 
11 | class DetectionCheckpointer(Checkpointer):
12 |     """
13 |     Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
14 |     model zoo, and apply conversions for legacy models.
15 |     """
16 | 
17 |     def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
18 |         is_main_process = comm.is_main_process()
19 |         super().__init__(
20 |             model,
21 |             save_dir,
22 |             save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
23 |             **checkpointables,
24 |         )
25 | 
26 |     def _load_file(self, filename):
27 |         if filename.endswith(".pkl"):
28 |             with PathManager.open(filename, "rb") as f:
29 |                 data = pickle.load(f, encoding="latin1")
30 |             if "model" in data and "__author__" in data:
31 |                 # file is in Detectron2 model zoo format
32 |                 self.logger.info("Reading a file from '{}'".format(data["__author__"]))
33 |                 return data
34 |             else:
35 |                 # assume file is from Caffe2 / Detectron1 model zoo
36 |                 if "blobs" in data:
37 |                     # Detection models have "blobs", but ImageNet models don't
38 |                     data = data["blobs"]
39 |                 data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
40 |                 return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
41 | 
42 |         loaded = super()._load_file(filename)  # load native pth checkpoint
43 |         if "model" not in loaded:
44 |             loaded = {"model": loaded}
45 |         return loaded
46 | 
47 |     def _load_model(self, checkpoint):
48 |         if checkpoint.get("matching_heuristics", False):
49 |             self._convert_ndarray_to_tensor(checkpoint["model"])
50 |             # convert weights by name-matching heuristics
51 |             model_state_dict = self.model.state_dict()
52 |             align_and_update_state_dicts(
53 |                 model_state_dict,
54 |                 checkpoint["model"],
55 |                 c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
56 |             )
57 |             checkpoint["model"] = model_state_dict
58 |         # for non-caffe2 models, use standard ways to load it
59 |         super()._load_model(checkpoint)
60 | 


--------------------------------------------------------------------------------
/detectron2/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .compat import downgrade_config, upgrade_config
 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg
 4 | 
 5 | 
 6 | __all__ = [
 7 |     "CfgNode",
 8 |     "get_cfg",
 9 |     "global_cfg",
10 |     "set_global_cfg",
11 |     "downgrade_config",
12 |     "upgrade_config",
13 | ]
14 | 


--------------------------------------------------------------------------------
/detectron2/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from . import transforms  # isort:skip
 3 | 
 4 | from .build import (
 5 |     build_detection_test_loader,
 6 |     build_detection_train_loader,
 7 |     get_detection_dataset_dicts,
 8 |     load_proposals_into_dataset,
 9 |     print_instances_class_histogram,
10 | )
11 | from .catalog import DatasetCatalog, MetadataCatalog
12 | from .common import DatasetFromList, MapDataset
13 | from .dataset_mapper import DatasetMapper
14 | 
15 | # ensure the builtin datasets are registered
16 | from . import datasets, samplers  # isort:skip
17 | 
18 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
19 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### Common Datasets
 4 | 
 5 | The dataset implemented here do not need to load the data into the final format.
 6 | It should provide the minimal data strcture needed to use the dataset, so it can be very efficient.
 7 | 
 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images.
 9 | Let the downstream decide how to read.
10 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .cityscapes import load_cityscapes_instances
 3 | from .coco import load_coco_json, load_sem_seg
 4 | from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
 5 | from .register_coco import register_coco_instances, register_coco_panoptic_separated
 6 | from . import builtin  # ensure the builtin datasets are registered
 7 | from .refcoco import register_refcoco, load_refcoco_instances
 8 | 
 9 | 
10 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
11 | 


--------------------------------------------------------------------------------
/detectron2/data/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from fvcore.common.file_io import PathManager
 5 | import os
 6 | import numpy as np
 7 | import xml.etree.ElementTree as ET
 8 | 
 9 | from detectron2.structures import BoxMode
10 | from detectron2.data import DatasetCatalog, MetadataCatalog
11 | 
12 | 
13 | __all__ = ["register_pascal_voc"]
14 | 
15 | 
16 | # fmt: off
17 | CLASS_NAMES = [
18 |     "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
19 |     "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
20 |     "pottedplant", "sheep", "sofa", "train", "tvmonitor",
21 | ]
22 | # fmt: on
23 | 
24 | 
25 | def load_voc_instances(dirname: str, split: str):
26 |     """
27 |     Load Pascal VOC detection annotations to Detectron2 format.
28 | 
29 |     Args:
30 |         dirname: Contain "Annotations", "ImageSets", "JPEGImages"
31 |         split (str): one of "train", "test", "val", "trainval"
32 |     """
33 |     with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
34 |         fileids = np.loadtxt(f, dtype=np.str)
35 | 
36 |     dicts = []
37 |     for fileid in fileids:
38 |         anno_file = os.path.join(dirname, "Annotations", fileid + ".xml")
39 |         jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
40 | 
41 |         tree = ET.parse(anno_file)
42 | 
43 |         r = {
44 |             "file_name": jpeg_file,
45 |             "image_id": fileid,
46 |             "height": int(tree.findall("./size/height")[0].text),
47 |             "width": int(tree.findall("./size/width")[0].text),
48 |         }
49 |         instances = []
50 | 
51 |         for obj in tree.findall("object"):
52 |             cls = obj.find("name").text
53 |             # We include "difficult" samples in training.
54 |             # Based on limited experiments, they don't hurt accuracy.
55 |             # difficult = int(obj.find("difficult").text)
56 |             # if difficult == 1:
57 |             # continue
58 |             bbox = obj.find("bndbox")
59 |             bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
60 |             # Original annotations are integers in the range [1, W or H]
61 |             # Assuming they mean 1-based pixel indices (inclusive),
62 |             # a box with annotation (xmin=1, xmax=W) covers the whole image.
63 |             # In coordinate space this is represented by (xmin=0, xmax=W)
64 |             bbox[0] -= 1.0
65 |             bbox[1] -= 1.0
66 |             instances.append(
67 |                 {"category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
68 |             )
69 |         r["annotations"] = instances
70 |         dicts.append(r)
71 |     return dicts
72 | 
73 | 
74 | def register_pascal_voc(name, dirname, split, year):
75 |     DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split))
76 |     MetadataCatalog.get(name).set(
77 |         thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split
78 |     )
79 | 


--------------------------------------------------------------------------------
/detectron2/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, DistributedSampler, IterationBasedBatchSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | 
 5 | __all__ = [
 6 |     "GroupedBatchSampler",
 7 |     "TrainingSampler",
 8 |     "InferenceSampler",
 9 |     "RepeatFactorTrainingSampler",
10 |     "DistributedSampler",
11 |     "IterationBasedBatchSampler"
12 | ]
13 | 


--------------------------------------------------------------------------------
/detectron2/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | from torch.utils.data.sampler import BatchSampler, Sampler
 4 | 
 5 | 
 6 | class GroupedBatchSampler(BatchSampler):
 7 |     """
 8 |     Wraps another sampler to yield a mini-batch of indices.
 9 |     It enforces that the batch only contain elements from the same group.
10 |     It also tries to provide mini-batches which follows an ordering which is
11 |     as close as possible to the ordering from the original sampler.
12 | 
13 |     Arguments:
14 |         sampler (Sampler): Base sampler.
15 |         group_ids (list[int]): If the sampler produces indices in range [0, N),
16 |             `group_ids` must be a list of `N` ints which contains the group id of each sample.
17 |             The group ids must be a continuous set of integers starting from
18 |             0, i.e. they must be in the range [0, num_groups).
19 |         batch_size (int): Size of mini-batch.
20 |     """
21 | 
22 |     def __init__(self, sampler, group_ids, batch_size):
23 |         if not isinstance(sampler, Sampler):
24 |             raise ValueError(
25 |                 "sampler should be an instance of "
26 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
27 |             )
28 |         self.sampler = sampler
29 |         self.group_ids = torch.as_tensor(group_ids)
30 |         assert self.group_ids.dim() == 1
31 |         self.batch_size = batch_size
32 |         self.groups = torch.unique(self.group_ids).sort(0)[0]
33 |         # group ids must range in [0, #group)
34 |         assert self.groups[0].item() == 0 and self.groups[-1].item() == len(self.groups) - 1
35 | 
36 |         # buffer the indices of each group until batch size is reached
37 |         self.buffer_per_group = [[] for k in self.groups]
38 | 
39 |     def __iter__(self):
40 |         for idx in self.sampler:
41 |             group_id = self.group_ids[idx]
42 |             group_buffer = self.buffer_per_group[group_id]
43 |             group_buffer.append(idx)
44 |             if len(group_buffer) == self.batch_size:
45 |                 yield group_buffer[:]  # yield a copy of the list
46 |                 del group_buffer[:]
47 | 


--------------------------------------------------------------------------------
/detectron2/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .transform import *
3 | from fvcore.transforms.transform import *
4 | from .transform_gen import *
5 | 
6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
7 | 


--------------------------------------------------------------------------------
/detectron2/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .launch import *
 4 | from .train_loop import *
 5 | 
 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 7 | 
 8 | 
 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
10 | # but still make them available here
11 | from .hooks import *
12 | from .defaults import *
13 | 


--------------------------------------------------------------------------------
/detectron2/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .cityscapes_evaluation import CityscapesEvaluator
 3 | from .coco_evaluation import COCOEvaluator
 4 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
 5 | from .lvis_evaluation import LVISEvaluator
 6 | from .panoptic_evaluation import COCOPanopticEvaluator
 7 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
 8 | from .sem_seg_evaluation import SemSegEvaluator
 9 | from .testing import print_csv_format, verify_results
10 | from .recoco_evaluation_grounding import RECOCOEvaluator
11 | from .flickr30k_evaluation_grounding import FLICKR30KEvaluator
12 | from .flickr30k_evaluation_grounding_lite import FLICKR30KEvaluator as FLICKR30KEvaluatorLite
13 | from .flickr30k_evaluation_grounding_v1 import FLICKR30KEvaluator as FLICKR30KEvaluatorV1
14 | from .flickr30k_evaluation_grounding_reg import FLICKR30KEvaluator as FLICKR30KEvaluatorReg
15 | from .flickr30k_evaluation_kac import FLICKR30KEvaluator as FLICKR30KEvaluatorKAC
16 | from .flickr30k_evaluation_grounding_reg_ml import FLICKR30KEvaluator as FLICKR30KEvaluatorREGML
17 | 
18 | 
19 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
20 | 


--------------------------------------------------------------------------------
/detectron2/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import numpy as np
 4 | import pprint
 5 | import sys
 6 | from collections import Mapping, OrderedDict
 7 | from ..utils.events import training_tags, val_tags
 8 | 
 9 | 
10 | def print_csv_format(results):
11 |     """
12 |     Print main metrics in a format similar to Detectron,
13 |     so that they are easy to copypaste into a spreadsheet.
14 | 
15 |     Args:
16 |         results (OrderedDict[dict]): task_name -> {metric -> score}
17 |     """
18 |     assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
19 |     logger = logging.getLogger(__name__)
20 |     for task, res in results.items():
21 |         # Don't print "AP-category" metrics since they are usually not tracked.
22 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
23 |         logger.info("copypaste: Task: {}".format(task))
24 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
25 |         logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
26 | 
27 | 
28 | def verify_results(cfg, results):
29 |     """
30 |     Args:
31 |         results (OrderedDict[dict]): task_name -> {metric -> score}
32 | 
33 |     Returns:
34 |         bool: whether the verification succeeds or not
35 |     """
36 |     expected_results = cfg.TEST.EXPECTED_RESULTS
37 |     if not len(expected_results):
38 |         return True
39 | 
40 |     ok = True
41 |     for task, metric, expected, tolerance in expected_results:
42 |         actual = results[task][metric]
43 |         if not np.isfinite(actual):
44 |             ok = False
45 |         diff = abs(actual - expected)
46 |         if diff > tolerance:
47 |             ok = False
48 | 
49 |     logger = logging.getLogger(__name__)
50 |     if not ok:
51 |         logger.error("Result verification failed!")
52 |         logger.error("Expected Results: " + str(expected_results))
53 |         logger.error("Actual Results: " + pprint.pformat(results))
54 | 
55 |         sys.exit(1)
56 |     else:
57 |         logger.info("Results verification passed.")
58 |     return ok
59 | 
60 | 
61 | def flatten_results_dict(results):
62 |     """
63 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
64 |     If results[k1][k2][k3] = v, the returned dict will have the entry
65 |     {"k1/k2/k3": v}.
66 | 
67 |     Args:
68 |         results (dict):
69 |     """
70 |     r = {}
71 |     for k, v in results.items():
72 |         if isinstance(v, Mapping):
73 |             v = flatten_results_dict(v)
74 |             for kk, vv in v.items():
75 |                 r[k + "/" + kk] = vv
76 |         else:
77 |             r[k] = v
78 |     return r
79 | 


--------------------------------------------------------------------------------
/detectron2/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
 3 | from .deform_conv import DeformConv, ModulatedDeformConv
 4 | from .mask_ops import paste_masks_in_image
 5 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
 6 | from .roi_align import ROIAlign, roi_align
 7 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
 8 | from .shape_spec import ShapeSpec
 9 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate
10 | 
11 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
12 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To add a new Op:
4 | 
5 | 1. Create a new directory
6 | 2. Implement new ops there
7 | 3. Delcare its Python interface in `vision.cpp`.
8 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | #pragma once
  3 | #include <torch/extension.h>
  4 | 
  5 | namespace detectron2 {
  6 | 
  7 | at::Tensor ROIAlignRotated_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio);
 14 | 
 15 | at::Tensor ROIAlignRotated_backward_cpu(
 16 |     const at::Tensor& grad,
 17 |     const at::Tensor& rois,
 18 |     const float spatial_scale,
 19 |     const int pooled_height,
 20 |     const int pooled_width,
 21 |     const int batch_size,
 22 |     const int channels,
 23 |     const int height,
 24 |     const int width,
 25 |     const int sampling_ratio);
 26 | 
 27 | #ifdef WITH_CUDA
 28 | at::Tensor ROIAlignRotated_forward_cuda(
 29 |     const at::Tensor& input,
 30 |     const at::Tensor& rois,
 31 |     const float spatial_scale,
 32 |     const int pooled_height,
 33 |     const int pooled_width,
 34 |     const int sampling_ratio);
 35 | 
 36 | at::Tensor ROIAlignRotated_backward_cuda(
 37 |     const at::Tensor& grad,
 38 |     const at::Tensor& rois,
 39 |     const float spatial_scale,
 40 |     const int pooled_height,
 41 |     const int pooled_width,
 42 |     const int batch_size,
 43 |     const int channels,
 44 |     const int height,
 45 |     const int width,
 46 |     const int sampling_ratio);
 47 | #endif
 48 | 
 49 | // Interface for Python
 50 | inline at::Tensor ROIAlignRotated_forward(
 51 |     const at::Tensor& input,
 52 |     const at::Tensor& rois,
 53 |     const float spatial_scale,
 54 |     const int pooled_height,
 55 |     const int pooled_width,
 56 |     const int sampling_ratio) {
 57 |   if (input.type().is_cuda()) {
 58 | #ifdef WITH_CUDA
 59 |     return ROIAlignRotated_forward_cuda(
 60 |         input,
 61 |         rois,
 62 |         spatial_scale,
 63 |         pooled_height,
 64 |         pooled_width,
 65 |         sampling_ratio);
 66 | #else
 67 |     AT_ERROR("Not compiled with GPU support");
 68 | #endif
 69 |   }
 70 |   return ROIAlignRotated_forward_cpu(
 71 |       input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
 72 | }
 73 | 
 74 | inline at::Tensor ROIAlignRotated_backward(
 75 |     const at::Tensor& grad,
 76 |     const at::Tensor& rois,
 77 |     const float spatial_scale,
 78 |     const int pooled_height,
 79 |     const int pooled_width,
 80 |     const int batch_size,
 81 |     const int channels,
 82 |     const int height,
 83 |     const int width,
 84 |     const int sampling_ratio) {
 85 |   if (grad.type().is_cuda()) {
 86 | #ifdef WITH_CUDA
 87 |     return ROIAlignRotated_backward_cuda(
 88 |         grad,
 89 |         rois,
 90 |         spatial_scale,
 91 |         pooled_height,
 92 |         pooled_width,
 93 |         batch_size,
 94 |         channels,
 95 |         height,
 96 |         width,
 97 |         sampling_ratio);
 98 | #else
 99 |     AT_ERROR("Not compiled with GPU support");
100 | #endif
101 |   }
102 |   return ROIAlignRotated_backward_cpu(
103 |       grad,
104 |       rois,
105 |       spatial_scale,
106 |       pooled_height,
107 |       pooled_width,
108 |       batch_size,
109 |       channels,
110 |       height,
111 |       width,
112 |       sampling_ratio);
113 | }
114 | 
115 | } // namespace detectron2
116 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor box_iou_rotated_cpu(
 8 |     const at::Tensor& boxes1,
 9 |     const at::Tensor& boxes2);
10 | 
11 | #ifdef WITH_CUDA
12 | at::Tensor box_iou_rotated_cuda(
13 |     const at::Tensor& boxes1,
14 |     const at::Tensor& boxes2);
15 | #endif
16 | 
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 |     const at::Tensor& boxes1,
22 |     const at::Tensor& boxes2) {
23 |   assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 |   if (boxes1.device().is_cuda()) {
25 | #ifdef WITH_CUDA
26 |     return box_iou_rotated_cuda(boxes1, boxes2);
27 | #else
28 |     AT_ERROR("Not compiled with GPU support");
29 | #endif
30 |   }
31 | 
32 |   return box_iou_rotated_cpu(boxes1, boxes2);
33 | }
34 | 
35 | } // namespace detectron2
36 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "box_iou_rotated.h"
 3 | #include "box_iou_rotated_utils.h"
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | template <typename T>
 8 | void box_iou_rotated_cpu_kernel(
 9 |     const at::Tensor& boxes1,
10 |     const at::Tensor& boxes2,
11 |     at::Tensor& ious) {
12 |   auto widths1 = boxes1.select(1, 2).contiguous();
13 |   auto heights1 = boxes1.select(1, 3).contiguous();
14 |   auto widths2 = boxes2.select(1, 2).contiguous();
15 |   auto heights2 = boxes2.select(1, 3).contiguous();
16 | 
17 |   at::Tensor areas1 = widths1 * heights1;
18 |   at::Tensor areas2 = widths2 * heights2;
19 | 
20 |   auto num_boxes1 = boxes1.size(0);
21 |   auto num_boxes2 = boxes2.size(0);
22 | 
23 |   for (int i = 0; i < num_boxes1; i++) {
24 |     for (int j = 0; j < num_boxes2; j++) {
25 |       ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
26 |           boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
27 |     }
28 |   }
29 | }
30 | 
31 | at::Tensor box_iou_rotated_cpu(
32 |     const at::Tensor& boxes1,
33 |     const at::Tensor& boxes2) {
34 |   auto num_boxes1 = boxes1.size(0);
35 |   auto num_boxes2 = boxes2.size(0);
36 |   at::Tensor ious =
37 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
38 | 
39 |   box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
40 | 
41 |   // reshape from 1d array to 2d array
42 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
43 |   return ious.reshape(shape);
44 | }
45 | 
46 | } // namespace detectron2
47 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor nms_rotated_cpu(
 8 |     const at::Tensor& dets,
 9 |     const at::Tensor& scores,
10 |     const float iou_threshold);
11 | 
12 | #ifdef WITH_CUDA
13 | at::Tensor nms_rotated_cuda(
14 |     const at::Tensor& dets,
15 |     const at::Tensor& scores,
16 |     const float iou_threshold);
17 | #endif
18 | 
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 |     const at::Tensor& dets,
24 |     const at::Tensor& scores,
25 |     const float iou_threshold) {
26 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
27 |   if (dets.device().is_cuda()) {
28 | #ifdef WITH_CUDA
29 |     return nms_rotated_cuda(dets, scores, iou_threshold);
30 | #else
31 |     AT_ERROR("Not compiled with GPU support");
32 | #endif
33 |   }
34 | 
35 |   return nms_rotated_cpu(dets, scores, iou_threshold);
36 | }
37 | 
38 | } // namespace detectron2
39 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
 3 | #include "nms_rotated.h"
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | template <typename scalar_t>
 8 | at::Tensor nms_rotated_cpu_kernel(
 9 |     const at::Tensor& dets,
10 |     const at::Tensor& scores,
11 |     const float iou_threshold) {
12 |   // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
13 |   // however, the code in this function is much shorter because
14 |   // we delegate the IoU computation for rotated boxes to
15 |   // the single_box_iou_rotated function in box_iou_rotated_utils.h
16 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
17 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
18 |   AT_ASSERTM(
19 |       dets.type() == scores.type(), "dets should have the same type as scores");
20 | 
21 |   if (dets.numel() == 0) {
22 |     return at::empty({0}, dets.options().dtype(at::kLong));
23 |   }
24 | 
25 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
26 | 
27 |   auto ndets = dets.size(0);
28 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
29 |   at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
30 | 
31 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
32 |   auto keep = keep_t.data_ptr<int64_t>();
33 |   auto order = order_t.data_ptr<int64_t>();
34 | 
35 |   int64_t num_to_keep = 0;
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1) {
40 |       continue;
41 |     }
42 | 
43 |     keep[num_to_keep++] = i;
44 | 
45 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
46 |       auto j = order[_j];
47 |       if (suppressed[j] == 1) {
48 |         continue;
49 |       }
50 | 
51 |       auto ovr = single_box_iou_rotated<scalar_t>(
52 |           dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
53 |       if (ovr >= iou_threshold) {
54 |         suppressed[j] = 1;
55 |       }
56 |     }
57 |   }
58 |   return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
59 | }
60 | 
61 | at::Tensor nms_rotated_cpu(
62 |     const at::Tensor& dets,
63 |     const at::Tensor& scores,
64 |     const float iou_threshold) {
65 |   auto result = at::empty({0}, dets.options());
66 | 
67 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms_rotated", [&] {
68 |     result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
69 |   });
70 |   return result;
71 | }
72 | 
73 | } // namespace detectron2
74 | 


--------------------------------------------------------------------------------
/detectron2/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "ROIAlign/ROIAlign.h"
 3 | #include "ROIAlignRotated/ROIAlignRotated.h"
 4 | #include "box_iou_rotated/box_iou_rotated.h"
 5 | #include "deformable/deform_conv.h"
 6 | #include "nms_rotated/nms_rotated.h"
 7 | 
 8 | namespace detectron2 {
 9 | 
10 | // similar to
11 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
12 | std::string get_compiler_version() {
13 |   std::ostringstream ss;
14 | #if defined(__GNUC__)
15 | #ifndef __clang__
16 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
17 | #endif
18 | #endif
19 | 
20 | #if defined(__clang_major__)
21 |   {
22 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
23 |        << __clang_patchlevel__;
24 |   }
25 | #endif
26 | 
27 | #if defined(_MSC_VER)
28 |   { ss << "MSVC " << _MSC_FULL_VER; }
29 | #endif
30 |   return ss.str();
31 | }
32 | 
33 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
34 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
35 | 
36 |   m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
37 | 
38 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
39 |   m.def(
40 |       "deform_conv_backward_input",
41 |       &deform_conv_backward_input,
42 |       "deform_conv_backward_input");
43 |   m.def(
44 |       "deform_conv_backward_filter",
45 |       &deform_conv_backward_filter,
46 |       "deform_conv_backward_filter");
47 |   m.def(
48 |       "modulated_deform_conv_forward",
49 |       &modulated_deform_conv_forward,
50 |       "modulated_deform_conv_forward");
51 |   m.def(
52 |       "modulated_deform_conv_backward",
53 |       &modulated_deform_conv_backward,
54 |       "modulated_deform_conv_backward");
55 | 
56 |   m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
57 | 
58 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
59 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
60 | 
61 |   m.def(
62 |       "roi_align_rotated_forward",
63 |       &ROIAlignRotated_forward,
64 |       "Forward pass for Rotated ROI-Align Operator");
65 |   m.def(
66 |       "roi_align_rotated_backward",
67 |       &ROIAlignRotated_backward,
68 |       "Backward pass for Rotated ROI-Align Operator");
69 | }
70 | 
71 | } // namespace detectron2
72 | 


--------------------------------------------------------------------------------
/detectron2/layers/gpu_hangon.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2020/6/25 12:07
 4 | 
 5 | 
 6 | 
 7 | from gpustat.core import GPUStatCollection
 8 | 
 9 | def get_gpu_status():
10 | 
11 |     gpus_stats = GPUStatCollection.new_query()
12 |     info = gpus_stats.jsonify()["gpus"]
13 |     gpu_list = []
14 | 
15 |     mem_ratio_threshold = 0.1  #
16 |     util_ratio_threshold = 10  #
17 |     for idx, each in enumerate(info):
18 |         mem_ratio = each["memory.used"] / each["memory.total"]
19 |         util_ratio = each["utilization.gpu"]
20 |         print(mem_ratio, util_ratio)
21 |         if mem_ratio < mem_ratio_threshold and util_ratio < util_ratio_threshold:
22 |             gpu_list.append(idx)
23 |     print("Scan GPUs to get {} free GPU".format(len(gpu_list)))
24 |     return gpu_list
25 | 
26 | 
27 | if __name__ == '__main__':
28 | 
29 |     gpu_list = get_gpu_status()
30 |     print(gpu_list)
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/detectron2/layers/move2cpu.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3.6
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2020/6/16 19:05
4 | 
5 | import torch
6 | 
7 | def move2cpu(data_lists):
8 |     data_lists = [data.to(torch.device('cpu')) for data in data_lists]
9 |     return data_lists


--------------------------------------------------------------------------------
/detectron2/layers/numerical_stability_softmax.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019/10/14 22:19
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | import torch
10 | import numpy as np
11 | 
12 | 
13 | def numerical_stability_softmax(score, dim, epsilon=1e-6):
14 | 
15 |     max_score, _ = score.max(dim, keepdim=True)
16 | 
17 |     stable_score = score - max_score
18 |     stable_exp = torch.exp(stable_score)
19 |     stable_prob = stable_exp / (stable_exp.sum(dim, keepdim=True)+epsilon)
20 | 
21 |     return stable_prob
22 | 
23 | def numerical_stability_masked_softmax(vec, mask, dim=1, epsilon=1e-6):
24 | 
25 |     masked_vec = vec * mask.float()
26 |     max_vec, _ = masked_vec.max(dim, keepdim=True)
27 |     stable_vec = vec - max_vec
28 |     stable_exps = torch.exp(stable_vec)
29 |     masked_exps = stable_exps * mask.float()
30 |     masked_sums = masked_exps.sum(dim, keepdim=True) + epsilon
31 |     masked_prob = masked_exps / masked_sums
32 | 
33 |     return masked_prob
34 | 
35 | 
36 | def numerical_stability_inner_masked_softmax(vec, mask, dim=1, num_phrases=2, topN=10, epsilon=1e-8):
37 | 
38 |     mask = mask.float()
39 | 
40 |     if dim==0:
41 |         vec = vec.permute(1,0)
42 |         mask = mask.permute(1,0)
43 | 
44 |     masked_inner_vec = vec * mask
45 |     masked_inner_vec = masked_inner_vec.contiguous().view(-1, topN)
46 |     inner_mask = mask.contiguous().view(-1, topN)
47 |     inner_max_vec, _ = masked_inner_vec.max(1, True)
48 |     stable_inner_vec = masked_inner_vec - inner_max_vec
49 |     stable_inner_exps = torch.exp(stable_inner_vec)
50 |     masked_inner_exps = stable_inner_exps * inner_mask.float()
51 |     masked_inner_sums = masked_inner_exps.sum(1, keepdim=True) + epsilon
52 |     masked_inner_prob = masked_inner_exps / masked_inner_sums  ## (np*N*np)*N
53 | 
54 |     masked_inner_vec_total = masked_inner_vec.sum(1).contiguous().view(num_phrases*topN, num_phrases)
55 |     inner_mask_total = inner_mask.sum(1).contiguous().view(num_phrases*topN, num_phrases).ge(1).float()
56 |     masked_inner_vec_total = masked_inner_vec_total * inner_mask_total
57 |     inner_max_vec_total, _ = masked_inner_vec_total.max(1, True)
58 |     stable_inner_exps_total = torch.exp(masked_inner_vec_total-inner_max_vec_total)
59 |     masked_inner_exps_total = stable_inner_exps_total * inner_mask_total
60 |     masked_inner_sum_total = masked_inner_exps_total.sum(1, keepdim=True) + epsilon ## (np*N)*np
61 |     masked_inner_prob_total = masked_inner_exps_total / masked_inner_sum_total
62 |     masked_inner_prob_total = masked_inner_prob_total.contiguous().view(-1).unsqueeze(1) ## (np*N*np) *1
63 | 
64 |     masked_inner_prob = masked_inner_prob * masked_inner_prob_total
65 |     masked_inner_prob = masked_inner_prob.contiguous().view(num_phrases*topN, num_phrases*topN)
66 | 
67 |     if dim == 0:
68 |         masked_inner_prob = masked_inner_prob.permute(1,0)
69 | 
70 |     return masked_inner_prob
71 | 
72 | 
73 | 
74 | 
75 | def masked_softmax(vec, mask, dim=1, epsilon=1e-6):
76 |     exps = torch.exp(vec)
77 |     masked_exps = exps * mask.float()
78 |     masked_sums = masked_exps.sum(dim, keepdim=True) + epsilon
79 |     return (masked_exps/masked_sums)
80 | 
81 | if __name__ == '__main__':
82 | 
83 |     import numpy as np
84 |     relation_conn = [[0,1],[0,2]]
85 |     topN = 10
86 |     conn_map = np.zero(30, 30)
87 | 
88 |     random_matrix = np.random.random((10,10))
89 |     for rel in relation_conn:
90 |         conn_map[rel[0]*topN:(rel[0]+1)*topN, rel[1]*topN:(rel[1]+1)*topN] = random_matrix
91 | 


--------------------------------------------------------------------------------
/detectron2/layers/ops.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019/12/11 14:01
 4 | 
 5 | 
 6 | 
 7 | import numpy as np
 8 | import torch.nn as nn
 9 | 
10 | class Linear(nn.Linear):
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 | 
14 |         # compatible with xavier_initializer in TensorFlow
15 |         fan_avg = (self.in_features + self.out_features) / 2.
16 |         bound = np.sqrt(3. / fan_avg)
17 |         nn.init.uniform_(self.weight, -bound, bound)
18 |         if self.bias is not None:
19 |             nn.init.constant_(self.bias, 0.)


--------------------------------------------------------------------------------
/detectron2/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from __future__ import absolute_import, division, print_function, unicode_literals
 3 | 
 4 | # import torch
 5 | from detectron2 import _C
 6 | 
 7 | 
 8 | def pairwise_iou_rotated(boxes1, boxes2):
 9 |     """
10 |     Return intersection-over-union (Jaccard index) of boxes.
11 | 
12 |     Both sets of boxes are expected to be in
13 |     (x_center, y_center, width, height, angle) format.
14 | 
15 |     Arguments:
16 |         boxes1 (Tensor[N, 5])
17 |         boxes2 (Tensor[M, 5])
18 | 
19 |     Returns:
20 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
21 |             IoU values for every element in boxes1 and boxes2
22 |     """
23 | 
24 |     return _C.box_iou_rotated(boxes1, boxes2)
25 | 


--------------------------------------------------------------------------------
/detectron2/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 7 |     """
 8 |     A simple structure that contains basic shape specification about a tensor.
 9 |     It is often used as the auxiliary inputs/outputs of models,
10 |     to obtain the shape inference ability among pytorch modules.
11 | 
12 |     Attributes:
13 |         channels:
14 |         height:
15 |         width:
16 |         stride:
17 |     """
18 | 
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/detectron2/layers/weighted_smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2020/6/5 14:43
 4 | 
 5 | 
 6 | 
 7 | 
 8 | import torch
 9 | def smooth_l1_loss(input: torch.Tensor, target: torch.Tensor, beta: float, weight: torch.Tensor, reduction: str = "none") -> torch.Tensor:
10 |     """
11 |     Smooth L1 loss defined in the Fast R-CNN paper as:
12 | 
13 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
14 |     smoothl1(x) = |
15 |                   | abs(x) - 0.5 * beta   otherwise,
16 | 
17 |     where x = input - target.
18 | 
19 |     Smooth L1 loss is related to Huber loss, which is defined as:
20 | 
21 |                 | 0.5 * x ** 2                  if abs(x) < beta
22 |      huber(x) = |
23 |                 | beta * (abs(x) - 0.5 * beta)  otherwise
24 | 
25 |     Smooth L1 loss is equal to huber(x) / beta. This leads to the following
26 |     differences:
27 | 
28 |      - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
29 |        converges to a constant 0 loss.
30 |      - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
31 |        converges to L2 loss.
32 |      - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
33 |        slope of 1. For Huber loss, the slope of the L1 segment is beta.
34 | 
35 |     Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
36 |     portion replaced with a quadratic function such that at abs(x) = beta, its
37 |     slope is 1. The quadratic segment smooths the L1 loss near x = 0.
38 | 
39 |     Args:
40 |         input (Tensor): input tensor of any shape
41 |         target (Tensor): target value tensor with the same shape as input
42 |         beta (float): L1 to L2 change point.
43 |             For beta values < 1e-5, L1 loss is computed.
44 |         reduction: 'none' | 'mean' | 'sum'
45 |                  'none': No reduction will be applied to the output.
46 |                  'mean': The output will be averaged.
47 |                  'sum': The output will be summed.
48 | 
49 |     Returns:
50 |         The loss with the reduction option applied.
51 | 
52 |     Note:
53 |         PyTorch's builtin "Smooth L1 loss" implementation does not actually
54 |         implement Smooth L1 loss, nor does it implement Huber loss. It implements
55 |         the special case of both in which they are equal (beta=1).
56 |         See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
57 |      """
58 |     if beta < 1e-5:
59 |         # if beta == 0, then torch.where will result in nan gradients when
60 |         # the chain rule is applied due to pytorch implementation details
61 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
62 |         # zeros, rather than "no gradient"). To avoid this issue, we define
63 |         # small values of beta to be exactly l1 loss.
64 |         loss = torch.abs(input - target)
65 |     else:
66 |         n = torch.abs(input - target)
67 |         cond = n < beta
68 |         loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
69 | 
70 |     loss = loss * weight.unsqueeze(1)
71 | 
72 |     if reduction == "mean":
73 |         loss = loss.mean()
74 |     elif reduction == "sum":
75 |         loss = loss.sum()
76 |     return loss


--------------------------------------------------------------------------------
/detectron2/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | from detectron2.layers import ShapeSpec
 5 | 
 6 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
 7 | from .backbone import (
 8 |     BACKBONE_REGISTRY,
 9 |     FPN,
10 |     Backbone,
11 |     ResNet,
12 |     ResNetBlockBase,
13 |     build_backbone,
14 |     build_resnet_backbone,
15 |     make_stage,
16 | )
17 | from .meta_arch import (
18 |     META_ARCH_REGISTRY,
19 |     SEM_SEG_HEADS_REGISTRY,
20 |     GeneralizedRCNN,
21 |     PanopticFPN,
22 |     ProposalNetwork,
23 |     RetinaNet,
24 |     SemanticSegmentor,
25 |     build_model,
26 |     build_sem_seg_head,
27 | )
28 | from .postprocessing import detector_postprocess
29 | from .proposal_generator import (
30 |     PROPOSAL_GENERATOR_REGISTRY,
31 |     build_proposal_generator,
32 |     RPN_HEAD_REGISTRY,
33 |     build_rpn_head,
34 | )
35 | from .roi_heads import (
36 |     ROI_BOX_HEAD_REGISTRY,
37 |     ROI_HEADS_REGISTRY,
38 |     ROI_KEYPOINT_HEAD_REGISTRY,
39 |     ROI_MASK_HEAD_REGISTRY,
40 |     ROIHeads,
41 |     StandardROIHeads,
42 |     build_box_head,
43 |     build_keypoint_head,
44 |     build_mask_head,
45 |     build_roi_heads,
46 | )
47 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
48 | 
49 | _EXCLUDE = {"torch", "ShapeSpec"}
50 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
51 | 
52 | assert (
53 |     torch.Tensor([1]) == torch.Tensor([2])
54 | ).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113"
55 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
 3 | 
 4 | from .backbone import Backbone
 5 | from .fpn import FPN
 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
 7 | from .deeplab import build_deeplabv2
 8 | from .deeplabv2 import build_deeplabv2_v1
 9 | from .resnet101 import build_resnet101
10 | 
11 | # TODO can expose more resnet blocks after careful consideration
12 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from abc import ABCMeta, abstractmethod
 3 | import torch.nn as nn
 4 | 
 5 | from detectron2.layers import ShapeSpec
 6 | 
 7 | __all__ = ["Backbone"]
 8 | 
 9 | 
10 | class Backbone(nn.Module, metaclass=ABCMeta):
11 |     """
12 |     Abstract base class for network backbones.
13 |     """
14 | 
15 |     def __init__(self):
16 |         """
17 |         The `__init__` method of any subclass can specify its own set of arguments.
18 |         """
19 |         super().__init__()
20 | 
21 |     @abstractmethod
22 |     def forward(self):
23 |         """
24 |         Subclasses must override this method, but adhere to the same return type.
25 | 
26 |         Returns:
27 |             dict[str: Tensor]: mapping from feature name (e.g., "res2") to tensor
28 |         """
29 |         pass
30 | 
31 |     @property
32 |     def size_divisibility(self):
33 |         """
34 |         Some backbones require the input height and width to be divisible by a
35 |         specific integer. This is typically true for encoder / decoder type networks
36 |         with lateral connection (e.g., FPN) for which feature maps need to match
37 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
38 |         input size divisibility is required.
39 |         """
40 |         return 0
41 | 
42 |     def output_shape(self):
43 |         """
44 |         Returns:
45 |             dict[str->ShapeSpec]
46 |         """
47 |         # this is a backward-compatible default
48 |         return {
49 |             name: ShapeSpec(
50 |                 channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
51 |             )
52 |             for name in self._out_features
53 |         }
54 | 
55 |     # the properties below are not used any more
56 | 
57 |     @property
58 |     def out_features(self):
59 |         """deprecated"""
60 |         return self._out_features
61 | 
62 |     @property
63 |     def out_feature_strides(self):
64 |         """deprecated"""
65 |         return {f: self._out_feature_strides[f] for f in self._out_features}
66 | 
67 |     @property
68 |     def out_feature_channels(self):
69 |         """deprecated"""
70 |         return {f: self._out_feature_channels[f] for f in self._out_features}
71 | 


--------------------------------------------------------------------------------
/detectron2/modeling/backbone/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.utils.registry import Registry
 4 | 
 5 | from .backbone import Backbone
 6 | 
 7 | BACKBONE_REGISTRY = Registry("BACKBONE")
 8 | """
 9 | Registry for backbones, which extract feature maps from images.
10 | """
11 | 
12 | 
13 | def build_backbone(cfg, input_shape=None):
14 |     """
15 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
16 | 
17 |     Returns:
18 |         an instance of :class:`Backbone`
19 |     """
20 |     if input_shape is None:
21 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
22 | 
23 |     backbone_name = cfg.MODEL.BACKBONE.NAME
24 |     backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
25 |     assert isinstance(backbone, Backbone)
26 |     return backbone
27 | 


--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from .build import META_ARCH_REGISTRY, build_model  # isort:skip
 5 | 
 6 | from .panoptic_fpn import PanopticFPN
 7 | 
 8 | # import all the meta_arch, so they will be registered
 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork
10 | from .retinanet import RetinaNet
11 | from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
12 | 


--------------------------------------------------------------------------------
/detectron2/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from detectron2.utils.registry import Registry
 3 | 
 4 | META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
 5 | """
 6 | Registry for meta-architectures, i.e. the whole model.
 7 | """
 8 | 
 9 | 
10 | def build_model(cfg):
11 |     """
12 |     Built the whole model, defined by `cfg.MODEL.META_ARCHITECTURE`.
13 |     """
14 |     meta_arch = cfg.MODEL.META_ARCHITECTURE
15 |     return META_ARCH_REGISTRY.get(meta_arch)(cfg)
16 | 


--------------------------------------------------------------------------------
/detectron2/modeling/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch.nn import functional as F
 3 | 
 4 | from detectron2.layers import paste_masks_in_image
 5 | from detectron2.structures import Instances
 6 | 
 7 | 
 8 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
 9 |     """
10 |     Resize the output instances.
11 |     The input images are often resized when entering an object detector.
12 |     As a result, we often need the outputs of the detector in a different
13 |     resolution from its inputs.
14 | 
15 |     This function will resize the raw outputs of an R-CNN detector
16 |     to produce outputs according to the desired output resolution.
17 | 
18 |     Args:
19 |         results (Instances): the raw outputs from the detector.
20 |             `results.image_size` contains the input image resolution the detector sees.
21 |             This object might be modified in-place.
22 |         output_height, output_width: the desired output resolution.
23 | 
24 |     Returns:
25 |         Instances: the resized output from the model, based on the output resolution
26 |     """
27 |     scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
28 |     results = Instances((output_height, output_width), **results.get_fields())
29 | 
30 |     if results.has("pred_boxes"):
31 |         output_boxes = results.pred_boxes
32 |     elif results.has("proposal_boxes"):
33 |         output_boxes = results.proposal_boxes
34 | 
35 |     output_boxes.tensor[:, 0::2] *= scale_x
36 |     output_boxes.tensor[:, 1::2] *= scale_y
37 |     output_boxes.clip(results.image_size)
38 | 
39 |     results = results[output_boxes.nonempty()]
40 | 
41 |     if results.has("pred_masks"):
42 |         results.pred_masks = paste_masks_in_image(
43 |             results.pred_masks[:, 0, :, :],  # N, 1, M, M
44 |             results.pred_boxes,
45 |             results.image_size,
46 |             threshold=mask_threshold,
47 |         )
48 | 
49 |     if results.has("pred_keypoints"):
50 |         results.pred_keypoints[:, :, 0] *= scale_x
51 |         results.pred_keypoints[:, :, 1] *= scale_y
52 | 
53 |     return results
54 | 
55 | 
56 | def sem_seg_postprocess(result, img_size, output_height, output_width):
57 |     """
58 |     Return semantic segmentation predictions in the original resolution.
59 | 
60 |     The input images are often resized when entering semantic segmentor. Moreover, in same
61 |     cases, they also padded inside segmentor to be divisible by maximum network stride.
62 |     As a result, we often need the predictions of the segmentor in a different
63 |     resolution from its inputs.
64 | 
65 |     Args:
66 |         result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
67 |             where C is the number of classes, and H, W are the height and width of the prediction.
68 |         img_size (tuple): image size that segmentor is taking as input.
69 |         output_height, output_width: the desired output resolution.
70 | 
71 |     Returns:
72 |         semantic segmenation prediction (Tensor): A tensor of the shape
73 |             (C, output_height, output_width) that contains per-pixel soft predictions.
74 |     """
75 |     result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
76 |     result = F.interpolate(
77 |         result, size=(output_height, output_width), mode="bilinear", align_corners=False
78 |     )[0]
79 |     return result
80 | 


--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head
4 | 


--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from detectron2.utils.registry import Registry
 3 | 
 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
 5 | """
 6 | Registry for proposal generator, which produces object proposals from feature maps.
 7 | """
 8 | 
 9 | from . import rpn, rrpn  # noqa F401 isort:skip
10 | 
11 | 
12 | def build_proposal_generator(cfg, input_shape):
13 |     """
14 |     Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
15 |     The name can be "PrecomputedProposals" to use no proposal generator.
16 |     """
17 |     name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
18 |     if name == "PrecomputedProposals":
19 |         return None
20 | 
21 |     return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
22 | 


--------------------------------------------------------------------------------
/detectron2/modeling/proposal_generator/proposal_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import math
 3 | import torch
 4 | 
 5 | from detectron2.structures import Instances
 6 | 
 7 | 
 8 | def add_ground_truth_to_proposals(gt_boxes, proposals):
 9 |     """
10 |     Call `add_ground_truth_to_proposals_single_image` for all images.
11 | 
12 |     Args:
13 |         gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
14 |             representing the gound-truth for image i.
15 |         proposals (list[Instances]): list of N elements. Element i is a Instances
16 |             representing the proposals for image i.
17 | 
18 |     Returns:
19 |         list[Instances]: list of N Instances. Each is the proposals for the image,
20 |             with field "proposal_boxes" and "objectness_logits".
21 |     """
22 |     assert gt_boxes is not None
23 | 
24 |     assert len(proposals) == len(gt_boxes)
25 |     if len(proposals) == 0:
26 |         return proposals
27 | 
28 |     return [
29 |         add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
30 |         for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
31 |     ]
32 | 
33 | 
34 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
35 |     """
36 |     Augment `proposals` with ground-truth boxes from `gt_boxes`.
37 | 
38 |     Args:
39 |         Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
40 |         per image.
41 | 
42 |     Returns:
43 |         Same as `add_ground_truth_to_proposals`, but for only one image.
44 |     """
45 |     device = proposals.objectness_logits.device
46 |     # Concating gt_boxes with proposals requires them to have the same fields
47 |     # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1.
48 |     gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
49 | 
50 |     gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
51 |     gt_proposal = Instances(proposals.image_size)
52 | 
53 |     gt_proposal.proposal_boxes = gt_boxes
54 |     gt_proposal.objectness_logits = gt_logits
55 |     new_proposals = Instances.cat([proposals, gt_proposal])
56 | 
57 |     return new_proposals
58 | 


--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head
3 | from .keypoint_head import ROI_KEYPOINT_HEAD_REGISTRY, build_keypoint_head
4 | from .mask_head import ROI_MASK_HEAD_REGISTRY, build_mask_head
5 | from .roi_heads import ROI_HEADS_REGISTRY, ROIHeads, StandardROIHeads, build_roi_heads
6 | 
7 | from . import cascade_rcnn  # isort:skip
8 | 


--------------------------------------------------------------------------------
/detectron2/modeling/roi_heads/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | import fvcore.nn.weight_init as weight_init
 4 | import torch
 5 | from torch import nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from detectron2.layers import Conv2d, ShapeSpec, get_norm
 9 | from detectron2.utils.registry import Registry
10 | 
11 | ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
12 | """
13 | Registry for box heads, which make box predictions from per-region features.
14 | """
15 | 
16 | 
17 | @ROI_BOX_HEAD_REGISTRY.register()
18 | class FastRCNNConvFCHead(nn.Module):
19 |     """
20 |     A head with several 3x3 conv layers (each followed by norm & relu) and
21 |     several fc layers (each followed by relu).
22 |     """
23 | 
24 |     def __init__(self, cfg, input_shape: ShapeSpec):
25 |         """
26 |         The following attributes are parsed from config:
27 |             num_conv, num_fc: the number of conv/fc layers
28 |             conv_dim/fc_dim: the dimension of the conv/fc layers
29 |             norm: normalization for the conv layers
30 |         """
31 |         super().__init__()
32 | 
33 |         # fmt: off
34 |         num_conv   = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
35 |         conv_dim   = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
36 |         num_fc     = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
37 |         fc_dim     = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
38 |         norm       = cfg.MODEL.ROI_BOX_HEAD.NORM
39 |         # fmt: on
40 |         assert num_conv + num_fc > 0
41 | 
42 |         self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
43 | 
44 |         self.conv_norm_relus = []
45 |         for k in range(num_conv):
46 |             conv = Conv2d(
47 |                 self._output_size[0],
48 |                 conv_dim,
49 |                 kernel_size=3,
50 |                 padding=1,
51 |                 bias=not norm,
52 |                 norm=get_norm(norm, conv_dim),
53 |                 activation=F.relu,
54 |             )
55 |             self.add_module("conv{}".format(k + 1), conv)
56 |             self.conv_norm_relus.append(conv)
57 |             self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
58 | 
59 |         self.fcs = []
60 |         for k in range(num_fc):
61 |             fc = nn.Linear(np.prod(self._output_size), fc_dim)
62 |             self.add_module("fc{}".format(k + 1), fc)
63 |             self.fcs.append(fc)
64 |             self._output_size = fc_dim
65 | 
66 |         for layer in self.conv_norm_relus:
67 |             weight_init.c2_msra_fill(layer)
68 |         for layer in self.fcs:
69 |             weight_init.c2_xavier_fill(layer)
70 | 
71 |     def forward(self, x):
72 |         for layer in self.conv_norm_relus:
73 |             x = layer(x)
74 |         if len(self.fcs):
75 |             if x.dim() > 2:
76 |                 x = torch.flatten(x, start_dim=1)
77 |             for layer in self.fcs:
78 |                 x = F.relu(layer(x))
79 |         return x
80 | 
81 |     @property
82 |     def output_size(self):
83 |         return self._output_size
84 | 
85 | 
86 | def build_box_head(cfg, input_shape):
87 |     """
88 |     Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
89 |     """
90 |     name = cfg.MODEL.ROI_BOX_HEAD.NAME
91 |     return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
92 | 


--------------------------------------------------------------------------------
/detectron2/modeling/sampling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | 
 5 | def subsample_labels(labels, num_samples, positive_fraction, bg_label):
 6 |     """
 7 |     Return `num_samples` random samples from `labels`, with a fraction of
 8 |     positives no larger than `positive_fraction`.
 9 | 
10 |     Args:
11 |         labels (Tensor): (N, ) label vector with values:
12 |                      -1: ignore
13 |                bg_label: background ("negative") class
14 |               otherwise: one or more foreground ("positive") classes
15 |         num_samples (int): The total number of labels with value >= 0 to return.
16 |             Values that are not sampled will be filled with -1 (ignore).
17 |         positive_fraction (float): The number of subsampled labels with values > 0
18 |             is `min(num_positives, int(positive_fraction * num_samples))`. The number
19 |             of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
20 |             In order words, if there are not enough positives, the sample is filled with
21 |             negatives. If there are also not enough negatives, then as many elements are
22 |             sampled as is possible.
23 |         bg_label (int): label index of background ("negative") class.
24 | 
25 |     Returns:
26 |         pos_idx, neg_idx (Tensor): 1D indices. The total number of indices is `num_samples`
27 |             if possible. The fraction of positive indices is `positive_fraction` if possible.
28 |     """
29 |     positive = torch.nonzero((labels != -1) & (labels != bg_label)).squeeze(1)
30 |     negative = torch.nonzero(labels == bg_label).squeeze(1)
31 | 
32 |     num_pos = int(num_samples * positive_fraction)
33 |     # protect against not enough positive examples
34 |     num_pos = min(positive.numel(), num_pos)
35 |     num_neg = num_samples - num_pos
36 |     # protect against not enough negative examples
37 |     num_neg = min(negative.numel(), num_neg)
38 | 
39 |     # randomly select positive and negative examples
40 |     perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
41 |     perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
42 | 
43 |     pos_idx = positive[perm1]
44 |     neg_idx = negative[perm2]
45 |     return pos_idx, neg_idx
46 | 


--------------------------------------------------------------------------------
/detectron2/modeling/weaklygrounding/loss_kac.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | from detectron2.layers.numerical_stability_softmax import numerical_stability_softmax
 5 | from detectron2.config import global_cfg as cfg
 6 | from fvcore.nn import sigmoid_focal_loss_jit, smooth_l1_loss
 7 | from detectron2.structures import Boxes
 8 | from detectron2.utils.events import get_event_storage
 9 | from detectron2.layers.weighted_smooth_l1_loss import smooth_l1_loss as weighted_smooth_l1_loss
10 | 
11 | 
12 | 
13 | class WeaklyVGLossCompute():
14 |     def __init__(self):
15 |         self.cross_entropy = torch.nn.CrossEntropyLoss(reduction='mean')
16 |         self.device = torch.device('cuda')
17 |         self.reg_lambda = cfg.MODEL.VG.REG_LOSS_FACTOR
18 | 
19 |     def __call__(self, batch_phrase_mask, batch_decode_logits, batch_phrase_dec_ids, batch_pred_delta, batch_gt_delta, batch_pred_similarity):
20 | 
21 |         noun_reconst_loss = torch.zeros(1).to(self.device)
22 |         visual_consistency_loss = torch.zeros(1).to(self.device)
23 | 
24 | 
25 |         for (phr_mask, decode_logits, phrase_dec_ids, pred_delta, det_sim, gt_delta) in zip(batch_phrase_mask, batch_decode_logits, batch_phrase_dec_ids, batch_pred_delta, batch_pred_similarity, batch_gt_delta):
26 | 
27 | 
28 |             ## here we ignore the first world reconstruction,
29 |             phrase_dec_ids = torch.as_tensor(phrase_dec_ids).long().to(self.device)
30 |             vx, vy = (phr_mask > 0).nonzero().transpose(0, 1)
31 |             noun_reconst_loss += self.cross_entropy(decode_logits[vx, vy], phrase_dec_ids[vx, vy])
32 | 
33 |             """
34 |             pred_delta: np*nb*4
35 |             gt_delta: nb*4
36 |             det_sim: np*nb
37 |             """
38 | 
39 |             np, nb = det_sim.shape
40 |             pred_delta = pred_delta.reshape(-1, 4)
41 |             gt_delta = gt_delta.unsqueeze(0).repeat(np, 1, 1).reshape(-1, 4)
42 |             gt_delta = gt_delta - 0.5
43 |             n = torch.abs(pred_delta - gt_delta)
44 |             cond = n < 1
45 |             loss = torch.where(cond, 0.5 * n ** 2, n - 0.5).mean(1)
46 |             vc_loss = det_sim.reshape(-1) * loss
47 |             visual_consistency_loss += self.reg_lambda * vc_loss.sum()
48 | 
49 |         return noun_reconst_loss, visual_consistency_loss
50 | 


--------------------------------------------------------------------------------
/detectron2/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .build import build_lr_scheduler, build_optimizer
3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/detectron2/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .boxes import Boxes, BoxMode, pairwise_iou
 3 | from .image_list import ImageList
 4 | from .instances import Instances
 5 | from .keypoints import Keypoints, heatmaps_to_keypoints
 6 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box
 7 | from .rotated_boxes import RotatedBoxes
 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
 9 | 
10 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
11 | 


--------------------------------------------------------------------------------
/detectron2/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/detectron2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/detectron2/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import os
 3 | import sys
 4 | from collections import defaultdict
 5 | import PIL
 6 | import torch
 7 | from tabulate import tabulate
 8 | 
 9 | __all__ = ["collect_env_info"]
10 | 
11 | 
12 | def collect_torch_env():
13 |     try:
14 |         import torch.__config__
15 | 
16 |         return torch.__config__.show()
17 |     except ImportError:
18 |         # compatible with older versions of pytorch
19 |         from torch.utils.collect_env import get_pretty_env_info
20 | 
21 |         return get_pretty_env_info()
22 | 
23 | 
24 | def get_env_module():
25 |     var_name = "DETECTRON2_ENV_MODULE"
26 |     return var_name, os.environ.get(var_name, "<not set>")
27 | 
28 | 
29 | def collect_env_info():
30 |     data = []
31 |     data.append(("Python", sys.version.replace("\n", "")))
32 |     try:
33 |         from detectron2 import _C
34 |     except ImportError:
35 |         pass
36 |     else:
37 |         data.append(("Detectron2 Compiler", _C.get_compiler_version()))
38 | 
39 |     data.append(get_env_module())
40 |     data.append(("PyTorch", torch.__version__))
41 |     data.append(("PyTorch Debug Build", torch.version.debug))
42 | 
43 |     has_cuda = torch.cuda.is_available()
44 |     data.append(("CUDA available", has_cuda))
45 |     if has_cuda:
46 |         devices = defaultdict(list)
47 |         for k in range(torch.cuda.device_count()):
48 |             devices[torch.cuda.get_device_name(k)].append(str(k))
49 |         for name, devids in devices.items():
50 |             data.append(("GPU " + ",".join(devids), name))
51 |     data.append(("Pillow", PIL.__version__))
52 | 
53 |     try:
54 |         import cv2
55 | 
56 |         data.append(("cv2", cv2.__version__))
57 |     except ImportError:
58 |         pass
59 |     env_str = tabulate(data) + "\n"
60 |     env_str += collect_torch_env()
61 |     return env_str
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     print(collect_env_info())
66 | 


--------------------------------------------------------------------------------
/detectron2/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | class Registry(object):
 3 |     """
 4 |     The registry that provides name -> object mapping, to support third-party users' custom modules.
 5 | 
 6 |     To create a registry (inside detectron2):
 7 | 
 8 |     .. code-block:: python
 9 | 
10 |         BACKBONE_REGISTRY = Registry('BACKBONE')
11 | 
12 |     To register an object:
13 | 
14 |     .. code-block:: python
15 | 
16 |         @BACKBONE_REGISTRY.register()
17 |         class MyBackbone():
18 |             ...
19 | 
20 |     Or:
21 | 
22 |         BACKBONE_REGISTRY.register(obj=MyBackbone)
23 |     """
24 | 
25 |     def __init__(self, name):
26 |         """
27 |         Args:
28 |             name (str): the name of this registry
29 |         """
30 |         self._name = name
31 | 
32 |         self._obj_map = {}
33 | 
34 |     def _do_register(self, name, obj):
35 |         assert (
36 |             name not in self._obj_map
37 |         ), "An object named '{}' was already registered in '{}' registry!".format(name, self._name)
38 |         self._obj_map[name] = obj
39 | 
40 |     def register(self, obj=None):
41 |         """
42 |         Register the given object under the the name `obj.__name__`.
43 |         Can be used as either a decorator or not. See docstring of this class for usage.
44 |         """
45 |         if obj is None:
46 |             # used as a decorator
47 |             def deco(func_or_class):
48 |                 name = func_or_class.__name__
49 |                 self._do_register(name, func_or_class)
50 |                 return func_or_class
51 | 
52 |             return deco
53 | 
54 |         # used as a function call
55 |         name = obj.__name__
56 |         self._do_register(name, obj)
57 | 
58 |     def get(self, name):
59 |         ret = self._obj_map.get(name)
60 |         if ret is None:
61 |             raise KeyError("No object named '{}' found in '{}' registry!".format(name, self._name))
62 |         return ret
63 | 


--------------------------------------------------------------------------------
/detectron2/utils/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 | 
15 |     def __init__(self, obj):
16 |         self._obj = obj
17 | 
18 |     def __reduce__(self):
19 |         s = cloudpickle.dumps(self._obj)
20 |         return cloudpickle.loads, (s,)
21 | 
22 |     def __call__(self, *args, **kwargs):
23 |         return self._obj(*args, **kwargs)
24 | 
25 |     def __getattr__(self, attr):
26 |         # Ensure that the wrapped object can be used seemlessly as the previous object.
27 |         if attr not in ["_obj"]:
28 |             return getattr(self._obj, attr)
29 |         return getattr(self, attr)
30 | 


--------------------------------------------------------------------------------
/dev/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ## Some scripts for developers to use, include:
3 | 
4 | - `linter.sh`: lint the codebase before commit
5 | - `run_{inference,instant}_tests.sh`: run inference/training for a few iterations.
6 | - `parse_results.sh`: parse results from log file.
7 | 


--------------------------------------------------------------------------------
/dev/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ev
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Run this script at project root by "./dev/linter.sh" before you commit
 5 | 
 6 | echo "Running isort ..."
 7 | isort -y --multi-line 3 --trailing-comma -sp . --skip datasets --skip docs --skip-glob '*/__init__.py' --atomic
 8 | 
 9 | echo "Running black ..."
10 | black -l 100 .
11 | 
12 | echo "Running flake8 ..."
13 | if [ -x "$(command -v flake8-3)" ]; then
14 |   flake8-3 .
15 | else
16 |   python3 -m flake8 .
17 | fi
18 | 
19 | # echo "Running mypy ..."
20 | # Pytorch does not have enough type annotations
21 | # mypy detectron2/solver detectron2/structures detectron2/config
22 | 
23 | echo "Running clang-format ..."
24 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i
25 | 
26 | command -v arc > /dev/null && arc lint
27 | 


--------------------------------------------------------------------------------
/dev/parse_results.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # A shell script that parses metrics from the log file.
 5 | # Make it easier for developers to track performance of models.
 6 | 
 7 | LOG="$1"
 8 | 
 9 | if [[ -z "$LOG" ]]; then
10 | 	echo "Usage: $0 /path/to/log/file"
11 | 	exit 1
12 | fi
13 | 
14 | # [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it)
15 | # [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices)
16 | 
17 | # training time
18 | trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*')
19 | echo "Training speed: $trainspeed s/it"
20 | 
21 | # inference time: there could be multiple inference during training
22 | inferencespeed=$(grep -o 'Total inference.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1)
23 | echo "Inference speed: $inferencespeed s/it"
24 | 
25 | # [12/15 11:47:18] trainer INFO: eta: 0:00:00  iter: 90000  loss: 0.5407 (0.7256)  loss_classifier: 0.1744 (0.2446)  loss_box_reg: 0.0838 (0.1160)  loss_mask: 0.2159 (0.2722)  loss_objectness: 0.0244 (0.0429)  loss_rpn_box_reg: 0.0279 (0.0500)  time: 0.4487 (0.4899)  data: 0.0076 (0.0975) lr: 0.000200  max mem: 4161
26 | memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*')
27 | echo "Training memory: $memory MB"
28 | 
29 | echo "Easy to copypaste:"
30 | echo "$trainspeed","$inferencespeed","$memory"
31 | 
32 | echo "------------------------------"
33 | 
34 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox
35 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
36 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011
37 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm
38 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
39 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011
40 | 
41 | echo "COCO Results:"
42 | num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l)
43 | # each task has 3 lines
44 | grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3))
45 | 


--------------------------------------------------------------------------------
/dev/run_inference_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | BIN="python tools/train_net.py"
 5 | OUTPUT="inference_test_output"
 6 | NUM_GPUS=2
 7 | 
 8 | CFG_LIST=( "${@:1}" )
 9 | 
10 | if [ ${#CFG_LIST[@]} -eq 0 ]; then
11 |   CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
12 | fi
13 | 
14 | echo "========================================================================"
15 | echo "Configs to run:"
16 | echo "${CFG_LIST[@]}"
17 | echo "========================================================================"
18 | 
19 | 
20 | for cfg in "${CFG_LIST[@]}"; do
21 |     echo "========================================================================"
22 |     echo "Running $cfg ..."
23 |     echo "========================================================================"
24 |     $BIN \
25 |       --eval-only \
26 |       --num-gpus $NUM_GPUS \
27 |       --config-file "$cfg" \
28 |       OUTPUT_DIR $OUTPUT
29 |       rm -rf $OUTPUT
30 | done
31 | 
32 | 
33 | echo "========================================================================"
34 | echo "Running demo.py ..."
35 | echo "========================================================================"
36 | DEMO_BIN="python demo/demo.py"
37 | COCO_DIR=datasets/coco/val2014
38 | mkdir -pv $OUTPUT
39 | 
40 | set -v
41 | 
42 | $DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \
43 |   --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT
44 | rm -rf $OUTPUT
45 | 


--------------------------------------------------------------------------------
/dev/run_instant_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | BIN="python tools/train_net.py"
 5 | OUTPUT="instant_test_output"
 6 | NUM_GPUS=2
 7 | 
 8 | CFG_LIST=( "${@:1}" )
 9 | if [ ${#CFG_LIST[@]} -eq 0 ]; then
10 |   CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
11 | fi
12 | 
13 | echo "========================================================================"
14 | echo "Configs to run:"
15 | echo "${CFG_LIST[@]}"
16 | echo "========================================================================"
17 | 
18 | for cfg in "${CFG_LIST[@]}"; do
19 |     echo "========================================================================"
20 |     echo "Running $cfg ..."
21 |     echo "========================================================================"
22 |     $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
23 |       SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \
24 |       OUTPUT_DIR "$OUTPUT"
25 |     rm -rf "$OUTPUT"
26 | done
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Build the docs:
3 | 
4 | 1. Install dependencies in `requirements.txt`
5 | 2. `make html`
6 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. detectron2 documentation master file, created by
 2 |    sphinx-quickstart on Sat Sep 21 13:46:45 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to detectron2's documentation!
 7 | ======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    tutorials/index
13 |    notes/index
14 |    modules/index
15 | 


--------------------------------------------------------------------------------
/docs/modules/checkpoint.rst:
--------------------------------------------------------------------------------
1 | detectron2.checkpoint package
2 | =============================
3 | 
4 | .. automodule:: detectron2.checkpoint
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/config.rst:
--------------------------------------------------------------------------------
1 | detectron2.config package
2 | =========================
3 | 
4 | .. automodule:: detectron2.config
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/data.datasets.rst:
--------------------------------------------------------------------------------
1 | detectron2.data.datasets package
2 | ================================
3 | 
4 | .. automodule:: detectron2.data.datasets
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/data.rst:
--------------------------------------------------------------------------------
 1 | detectron2.data package
 2 | =======================
 3 | 
 4 | .. automodule:: detectron2.data
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | detectron2.data.detection\_utils module
10 | ---------------------------------------
11 | 
12 | .. automodule:: detectron2.data.detection_utils
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | Subpackages
18 | -----------
19 | 
20 | .. toctree::
21 | 
22 |     data.datasets
23 |     data.samplers
24 |     data.transforms
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/modules/data.samplers.rst:
--------------------------------------------------------------------------------
1 | detectron2.data.samplers package
2 | ================================
3 | 
4 | .. automodule:: detectron2.data.samplers
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/data.transforms.rst:
--------------------------------------------------------------------------------
1 | detectron2.data.transforms package
2 | ==================================
3 | 
4 | .. automodule:: detectron2.data.transforms
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/engine.rst:
--------------------------------------------------------------------------------
 1 | detectron2.engine package
 2 | =========================
 3 | 
 4 | 
 5 | .. automodule:: detectron2.engine
 6 |     :members:
 7 |     :undoc-members:
 8 |     :show-inheritance:
 9 | 
10 | 
11 | detectron2.engine.defaults module
12 | ---------------------------------
13 | 
14 | .. automodule:: detectron2.engine.defaults
15 |     :members:
16 |     :undoc-members:
17 |     :show-inheritance:
18 | 
19 | detectron2.engine.hooks module
20 | ---------------------------------
21 | 
22 | .. automodule:: detectron2.engine.hooks
23 |     :members:
24 |     :undoc-members:
25 |     :show-inheritance:
26 | 


--------------------------------------------------------------------------------
/docs/modules/evaluation.rst:
--------------------------------------------------------------------------------
1 | detectron2.evaluation package
2 | =============================
3 | 
4 | .. automodule:: detectron2.evaluation
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/index.rst:
--------------------------------------------------------------------------------
 1 | API Documentation
 2 | ==================
 3 | 
 4 | .. toctree::
 5 | 
 6 |     checkpoint
 7 |     config
 8 |     data
 9 |     engine
10 |     evaluation
11 |     layers
12 |     modeling
13 |     solver
14 |     structures
15 |     utils
16 | 


--------------------------------------------------------------------------------
/docs/modules/layers.rst:
--------------------------------------------------------------------------------
1 | detectron2.layers package
2 | =========================
3 | 
4 | .. automodule:: detectron2.layers
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/modeling.rst:
--------------------------------------------------------------------------------
1 | detectron2.modeling package
2 | ===========================
3 | 
4 | .. automodule:: detectron2.modeling
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/solver.rst:
--------------------------------------------------------------------------------
1 | detectron2.solver package
2 | =========================
3 | 
4 | .. automodule:: detectron2.solver
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/structures.rst:
--------------------------------------------------------------------------------
1 | detectron2.structures package
2 | =============================
3 | 
4 | .. automodule:: detectron2.structures
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/modules/utils.rst:
--------------------------------------------------------------------------------
 1 | detectron2.utils package
 2 | ========================
 3 | 
 4 | detectron2.utils.colormap module
 5 | --------------------------------
 6 | 
 7 | .. automodule:: detectron2.utils.colormap
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 | 
12 | detectron2.utils.comm module
13 | ----------------------------
14 | 
15 | .. automodule:: detectron2.utils.comm
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 
20 | 
21 | detectron2.utils.events module
22 | ------------------------------
23 | 
24 | .. automodule:: detectron2.utils.events
25 |     :members:
26 |     :undoc-members:
27 |     :show-inheritance:
28 | 
29 | 
30 | detectron2.utils.logger module
31 | ------------------------------
32 | 
33 | .. automodule:: detectron2.utils.logger
34 |     :members:
35 |     :undoc-members:
36 |     :show-inheritance:
37 | 
38 | 
39 | detectron2.utils.registry module
40 | --------------------------------
41 | 
42 | .. automodule:: detectron2.utils.registry
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | detectron2.utils.video\_visualizer module
49 | -----------------------------------------
50 | 
51 | .. automodule:: detectron2.utils.video_visualizer
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 
56 | detectron2.utils.visualizer module
57 | ----------------------------------
58 | 
59 | .. automodule:: detectron2.utils.visualizer
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 
64 | 


--------------------------------------------------------------------------------
/docs/notes/changelog.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | 
 4 | ### Major Changes in Code
 5 | 
 6 | * 2019-10-10: initial release.
 7 | 
 8 | 
 9 | ### Config Version Change Log
10 | 
11 | * v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`.
12 | * v2: A batch of rename of many configurations before release.
13 | 


--------------------------------------------------------------------------------
/docs/notes/index.rst:
--------------------------------------------------------------------------------
 1 | Notes
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    benchmarks
 8 |    compatibility
 9 |    changelog
10 | 


--------------------------------------------------------------------------------
/docs/tutorials/configs.md:
--------------------------------------------------------------------------------
 1 | # Using Configs
 2 | 
 3 | Detectron2's config system uses yaml and [yacs](https://github.com/rbgirshick/yacs).
 4 | In addition to the basic operations that access and update a config, we provide
 5 | the following extra functionalities:
 6 | 
 7 | 1. The config can have `_BASE_: base.yaml` field, which will load a base config first.
 8 |    Values in the base config will be overwritten in sub-configs, if there are any conflicts.
 9 |    We provided several base configs for standard model architectures.
10 | 2. We provide config versioning, for backward compatibility.
11 |    If your config file is versioned with a config line like `VERSION: 2`,
12 |    detectron2 will still recognize it even if we make rename to some keys in the future.
13 | 
14 | ### Best Practice with Configs
15 | 
16 | 1. Treat the configs you write as "code": avoid copying them or duplicating them; use "_BASE_"
17 |    instead to share common parts between configs.
18 | 
19 | 2. Keep the configs you write simple: don't include keys that do not affect the experimental setting.
20 | 
21 | 3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`,
22 |    for backward compatibility.
23 | 
24 | 4. Save a full config together with a trained model, and use it to run inference.
25 |    This is more robust to changes that may happen to the config definition
26 |    (e.g., if a default value changed).
27 | 


--------------------------------------------------------------------------------
/docs/tutorials/extend.md:
--------------------------------------------------------------------------------
 1 | # Extend Detectron2's Defaults
 2 | 
 3 | __Research is about doing things in new ways__.
 4 | This brings a tension in how to create abstractions in code,
 5 | which is a challenge for any research engineering project of a significant size:
 6 | 
 7 | 1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing
 8 |    everything in new ways. It should be reasonably easy to break existing
 9 |    abstractions and replace them with new ones.
10 | 
11 | 2. On the other hand, such a project also needs reasonably high-level
12 |    abstractions, so that users can easily do things in standard ways,
13 |    without worrying too much about the details that only certain researchers care about.
14 | 
15 | In detectron2, there are two types of interfaces that address this tension together:
16 | 
17 | 1. Functions and classes that take only a "config" argument (optionally with a minimal
18 |    set of extra arguments in cases of mature interfaces).
19 | 
20 |    Such functions and classes implement
21 |    the "standard default" behavior: it will read what it needs from the
22 |    config and do the "standard" thing.
23 |    Users only need to load a standard config and pass it around, without having to worry about
24 |    which arguments are used and what they all mean.
25 | 
26 | 2. Functions and classes that have well-defined explicit arguments.
27 | 
28 |    Each of these is a small building block of the entire system.
29 |    They require users' effort to stitch together, but can be stitched together in more flexible ways.
30 |    When you need to implement something different from the "standard defaults"
31 |    included in detectron2, these well-defined components can be reused.
32 | 
33 | 
34 | If you only need the standard behavior, the [Beginner's Tutorial](getting_started)
35 | should suffice. If you need to extend detectron2 to your own needs,
36 | see the following tutorials for more details:
37 | 
38 | * Detectron2 includes a few standard datasets, but you can use custom ones. See
39 |   [Use Custom Datasets](datasets).
40 | * Detectron2 contains the standard logic that creates a data loader from a
41 |   dataset, but you can write your own as well. See [Use Custom Data Loaders](data_loading).
42 | * Detectron2 implements many standard detection models, and provide ways for you
43 |   to overwrite its behaviors. See [Writing Models](models).
44 | * Detectron2 provides a default training loop that is good for common training tasks.
45 |   You can customize it with hooks, or write your own loop instead. See [training](training).
46 | 


--------------------------------------------------------------------------------
/docs/tutorials/getting_started.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Getting Started with Detectron2
 3 | 
 4 | This document provides a brief intro of the usage of builtin command-line tools in detectron2.
 5 | 
 6 | For a tutorial that involves actual coding with the API,
 7 | see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
 8 | which covers how to run inference with an
 9 | existing model, and how to train a builtin model on a custom dataset.
10 | 
11 | For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
12 | 
13 | 
14 | ### Inference with Pre-trained Models
15 | 
16 | 1. Pick a model and its config file from
17 | 	[model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md),
18 | 	for example, `mask_rcnn_R_50_FPN_3x.yaml`.
19 | 2. Run the demo with
20 | ```
21 | python demo/demo.py --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
22 |   --input input1.jpg input2.jpg \
23 | 	--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
24 | ```
25 | It will run the inference and show visualizations in an OpenCV window.
26 | 
27 | * To run __on your webcam__, replace `--input files` with `--webcam`.
28 | * To run __on a video__, replace `--input files` with `--video-input video.mp4`.
29 | * To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
30 | 
31 | 
32 | ### Train a Standard Model
33 | 
34 | We provide a script in "tools/train_net.py", that is made to train
35 | all the configs provided in detectron2.
36 | You may want to use it as a reference to write your own training script for a new research.
37 | 
38 | To train a model with "train_net.py", first
39 | setup the corresponding datasets following
40 | [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md),
41 | then run:
42 | ```
43 | python tools/train_net.py --num-gpus 8 \
44 | 	--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
45 | ```
46 | 
47 | The configs are made for 8-GPU training. To train on 1 GPU, use:
48 | ```
49 | python tools/train_net.py \
50 | 	--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
51 | 	SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
52 | ```
53 | 
54 | For most models, CPU training is not supported.
55 | 
56 | (Note that we applied the [linear learning rate scaling rule](https://arxiv.org/abs/1706.02677)
57 | when changing the batch size.)
58 | 
59 | To evaluate a model's performance, use `train_net.py --eval-only`.
60 | For more options, see `python tools/train_net.py -h`.
61 | 
62 | ### Use Detectron2 in Your Code
63 | 
64 | See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
65 | to learn how to use detectron2 APIs to:
66 | 1. run inference with an existing model
67 | 2. train a builtin model on a custom dataset
68 | 
69 | See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
70 | for more ways to build your project on detectron2.
71 | 


--------------------------------------------------------------------------------
/docs/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    install
 8 |    getting_started
 9 |    extend
10 |    datasets
11 |    data_loading
12 |    models
13 |    training
14 |    configs
15 | 


--------------------------------------------------------------------------------
/docs/tutorials/install.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) also has step-by-step instructions that install detectron2.
 4 | 
 5 | ### Requirements
 6 | - Python >= 3.6
 7 | - PyTorch 1.3
 8 | - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 9 | 	You can install them together at [pytorch.org](http://pytorch.org) to make sure of this.
10 | - OpenCV, needed by demo and visualization
11 | - [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'`
12 | - pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
13 | - GCC >= 4.9
14 | - apex  
15 | git clone https://www.github.com/nvidia/apex  
16 | cd apex  
17 | python setup.py install
18 | 
19 | 
20 | ### Build detectron2
21 | 
22 | After having the above dependencies, run:
23 | ```
24 | git clone git@github.com:facebookresearch/detectron2.git
25 | cd detectron2
26 | export TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" ## compile for every platform
27 | python setup.py build develop
28 | 
29 | # or if you are on macOS
30 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
31 | 
32 | # or, as an alternative to `setup.py`, do
33 | # pip install .
34 | ```
35 | Note: you may need to rebuild detectron2 after reinstalling a different build of PyTorch.
36 | 
37 | ### Common Installation Issues
38 | 
39 | + Undefined torch/aten symbols, or segmentation fault immediately when running the library.
40 |   This may mean one of the two:
41 | 
42 | 	* detectron2 or torchvision is not compiled with the version of PyTorch you're running.
43 | 
44 | 		If you use a pre-built torchvision, uninstall torchvision & pytorch, and reinstall them
45 | 		following [pytorch.org](http://pytorch.org).
46 | 		If you manually build detectron2 or torchvision, remove the files you built (`build/`, `**/*.so`)
47 | 		and rebuild them.
48 | 
49 | 	* detectron2 or torchvision is not compiled using gcc >= 4.9.
50 | 
51 | 	  You'll see a warning message during compilation in this case. Please remove the files you build,
52 | 		and rebuild them.
53 | 		Technically, you need the identical compiler that's used to build pytorch to guarantee
54 | 		compatibility. But in practice, gcc >= 4.9 should work OK.
55 | 
56 | + Undefined cuda symbols. The version of NVCC you use to build detectron2 or torchvision does
57 | 	not match the version of cuda you are running with.
58 | 	This happens sometimes when using anaconda.
59 | 
60 | 


--------------------------------------------------------------------------------
/docs/tutorials/models.md:
--------------------------------------------------------------------------------
 1 | # Using and Writing Models
 2 | 
 3 | Models (and their sub-models) in detectron2 are built by
 4 | functions such as `build_model`, `build_backbone`, `build_roi_heads`:
 5 | ```python
 6 | from detectron2.modeling import build_model
 7 | model = build_model(cfg)  # returns a torch.nn.Module
 8 | ```
 9 | 
10 | In some cases, e.g. if you are trying to do something completely new, you may wish to implement
11 | a model entirely from scratch within detectron2. However, in many situations you may
12 | be interested in modifying or extending some components of an existing model.
13 | Therefore, we also provide a registration mechanism that lets you override the
14 | behavior of certain internal components of standard models.
15 | 
16 | For example, to add a new backbone, import this code:
17 | ```python
18 | from detectron2.modeling import BACKBONE_REGISTRY, Backbone
19 | @BACKBONE_REGISTRY.register()
20 | class NewBackBone(Backbone):
21 |   def __init__(self, cfg, input_shape):
22 |     # create your own backbone
23 | ```
24 | which will allow you to use `cfg.MODEL.BACKBONE.NAME = 'NewBackBone'` in your config file.
25 | 
26 | As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture,
27 | you can implement a new
28 | [ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`.
29 | See [densepose in detectron2](https://github.com/facebookresearch/detectron2/tree/master/projects/DensePose)
30 | for an example.
31 | 
32 | Other registries can be found in [API documentation](../modules/modeling.html).
33 | 


--------------------------------------------------------------------------------
/docs/tutorials/training.md:
--------------------------------------------------------------------------------
 1 | # Training
 2 | 
 3 | From the previous tutorials, you may now have a custom model and data loader.
 4 | 
 5 | You are free to create your own optimizer, and write the training logic: it's
 6 | usually easy with PyTorch, and allow researchers to see the entire training
 7 | logic more clearly.
 8 | 
 9 | We also provide a standarized "trainer" abstraction with a 
10 | [minimal hook system](../modules/engine.html#detectron2.engine.HookBase) 
11 | that helps simplify the standard types of training.
12 | 
13 | You can use 
14 | [SimpleTrainer().train()](../modules/engine.html#detectron2.engine.SimpleTrainer)
15 | which does single-cost single-optimizer single-data-source training.
16 | Or use [DefaultTrainer().train()](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer)
17 | which includes more standard behavior that one might want to opt in.
18 | 


--------------------------------------------------------------------------------
/killpy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ps -ef|grep python|grep -v grep|cut -c 9-15|xargs kill -9


--------------------------------------------------------------------------------
/outputs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/ReIR-WeaklyGrounding.pytorch/2a962c335541c981149a042794ee508e0e7226f4/outputs/.DS_Store


--------------------------------------------------------------------------------
/projects/DensePose/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # DensePose in Detectron2
 3 | **Dense Human Pose Estimation In The Wild**
 4 | 
 5 | _Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
 6 | 
 7 | [[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
 8 | 
 9 | Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
10 | 
11 | <div align="center">
12 |   <img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
13 | </div>
14 | 
15 | In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
16 | DensePose annotation and results.
17 | 
18 | ## Training
19 | 
20 | To train a model one can call
21 | ```bash
22 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file <config.yaml>
23 | ```
24 | 
25 | For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone on a single GPU,
26 | one should execute:
27 | ```bash
28 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file /path/to/detectron2/projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml
29 | ```
30 | 
31 | ## Testing
32 | 
33 | Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
34 | model location specification through `MODEL.WEIGHT model.pth` in the command line
35 | ```bash
36 | python /path/to/detectron2/projects/DensePose/train_net.py --config-file /path/to/detectron2/projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml --eval-only MODEL.WEIGHT model.pth
37 | ```
38 | 
39 | ## Tools
40 | 
41 | We provide tools which allow one to:
42 |  - easily view DensePose annotated data in a dataset;
43 |  - perform DensePose inference on a set of images;
44 |  - visualize DensePose model results;
45 | 
46 | `query_db` is a tool to print or visualize DensePose data in a dataset.
47 | Details on this tool can be found in [`TOOL_QUERY_DB.md`](doc/TOOL_QUERY_DB.md) 
48 | 
49 | `apply_net` is a tool to print or visualize DensePose results.
50 | Details on this tool can be found in [`TOOL_APPLY_NET.md`](doc/TOOL_APPLY_NET.md) 
51 | 
52 | ## <a name="CitingDensePose"></a>Citing DensePose
53 | 
54 | If you use DensePose, please use the following BibTeX entry.
55 | 
56 | ```
57 | @InProceedings{Guler2018DensePose,
58 |   title={DensePose: Dense Human Pose Estimation In The Wild},
59 |   author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
60 |   journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
61 |   year={2018}
62 | }
63 | ```
64 | 
65 | 


--------------------------------------------------------------------------------
/projects/DensePose/configs/Base-DensePose-RCNN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 | 
22 |   DENSEPOSE_ON: True
23 |   ROI_HEADS:
24 |     NAME: "DensePoseROIHeads"
25 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
26 |     NUM_CLASSES: 1
27 |   ROI_BOX_HEAD:
28 |     NAME: "FastRCNNConvFCHead"
29 |     NUM_FC: 2
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SAMPLING_RATIO: 2
32 |     POOLER_TYPE: "ROIAlign"
33 |   ROI_DENSEPOSE_HEAD:
34 |     NAME: "DensePoseV1ConvXHead"
35 |     POOLER_TYPE: "ROIAlign"
36 | DATASETS:
37 |   TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
38 |   TEST: ("densepose_coco_2014_minival",)
39 | SOLVER:
40 |   IMS_PER_BATCH: 16
41 |   BASE_LR: 0.02
42 |   STEPS: (60000, 80000)
43 |   MAX_ITER: 90000
44 | 


--------------------------------------------------------------------------------
/projects/DensePose/configs/densepose_R_50_FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DensePose-RCNN.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 4 | MODEL:
 5 |   WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50"
 6 | SOLVER:
 7 |   MAX_ITER: 130000
 8 |   STEPS: (100000, 120000)
 9 |   BASE_LR: 0.002
10 |   IMS_PER_BATCH: 24
11 |   WARMUP_FACTOR: 0.1
12 | 
13 | 


--------------------------------------------------------------------------------
/projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../densepose_R_50_FPN_s1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://e2e_densepose_R_50_FPN_s1x/124238535/model_final_5f3d7f9875229310fdfe6649459c0157.pkl"
4 | DATASETS:
5 |   TRAIN: ()
6 |   TEST: ("densepose_coco_2014_minival_100",)
7 | TEST:
8 |   EXPECTED_RESULTS: [["bbox", "AP", 56.05, 0.025], ["densepose", "AP",  46.54, 0.02]]
9 | 


--------------------------------------------------------------------------------
/projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-DensePose-RCNN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   DENSEPOSE_ON: True
 5 |   ROI_DENSEPOSE_HEAD:
 6 |     NAME: "DensePoseV1ConvXHead"
 7 | DATASETS:
 8 |   TRAIN: ("densepose_coco_2014_minival_100",)
 9 |   TEST: ("densepose_coco_2014_minival_100",)
10 | SOLVER:
11 |   MAX_ITER: 40
12 |   STEPS: (30,)
13 |   BASE_LR: 0.002
14 |   IMS_PER_BATCH: 24
15 | 


--------------------------------------------------------------------------------
/projects/DensePose/configs/quick_schedules/densepose_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-DensePose-RCNN.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 4 | MODEL:
 5 |   WEIGHTS: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   DENSEPOSE_ON: True
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1
 9 |   ROI_BOX_HEAD:
10 |     POOLER_RESOLUTION: 7
11 |     POOLER_SAMPLING_RATIO: 2
12 |   ROI_DENSEPOSE_HEAD:
13 |     NAME: "DensePoseV1ConvXHead"
14 | DATASETS:
15 |   TRAIN: ("densepose_coco_2014_minival",)
16 |   TEST: ("densepose_coco_2014_minival",)
17 | SOLVER:
18 |   MAX_ITER: 6000
19 |   STEPS: (5500, 5800)
20 |   BASE_LR: 0.002
21 |   IMS_PER_BATCH: 24
22 |   WARMUP_FACTOR: 0.1
23 | TEST:
24 |   EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose", "AP", 42.47, 1.5]]
25 | 
26 | 


--------------------------------------------------------------------------------
/projects/DensePose/densepose/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from . import dataset  # just to register data
3 | from .config import add_densepose_config
4 | from .dataset_mapper import DatasetMapper
5 | from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
6 | from .evaluator import DensePoseCOCOEvaluator
7 | from .roi_head import DensePoseROIHeads
8 | from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
9 | 


--------------------------------------------------------------------------------
/projects/DensePose/densepose/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | 
 6 | 
 7 | def add_densepose_config(cfg):
 8 |     """
 9 |     Add config for densepose head.
10 |     """
11 |     _C = cfg
12 | 
13 |     _C.MODEL.DENSEPOSE_ON = True
14 | 
15 |     _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
16 |     _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
17 |     _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
18 |     # Number of parts used for point labels
19 |     _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
20 |     _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
21 |     _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
22 |     _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
23 |     _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
24 |     _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 56
25 |     _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
26 |     _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 14
27 |     _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
28 |     # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
29 |     _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
30 |     # Loss weights for annotation masks.(14 Parts)
31 |     _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 2.0
32 |     # Loss weights for surface parts. (24 Parts)
33 |     _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 0.3
34 |     # Loss weights for UV regression.
35 |     _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.1
36 | 


--------------------------------------------------------------------------------
/projects/DensePose/densepose/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import os
 3 | 
 4 | from detectron2.data.datasets import register_coco_instances
 5 | 
 6 | 
 7 | def get_densepose_metadata():
 8 |     meta = {
 9 |         "thing_classes": ["person"],
10 |         "densepose_transform_src": "detectron2://densepose/UV_symmetry_transforms.mat",
11 |         "densepose_smpl_subdiv": "detectron2://densepose/SMPL_subdiv.mat",
12 |         "densepose_smpl_subdiv_transform": "detectron2://densepose/SMPL_SUBDIV_TRANSFORM.mat",
13 |     }
14 |     return meta
15 | 
16 | 
17 | SPLITS = {
18 |     "densepose_coco_2014_train": ("coco/train2014", "coco/annotations/densepose_train2014.json"),
19 |     "densepose_coco_2014_minival": ("coco/val2014", "coco/annotations/densepose_minival2014.json"),
20 |     "densepose_coco_2014_minival_100": (
21 |         "coco/val2014",
22 |         "coco/annotations/densepose_minival2014_100.json",
23 |     ),
24 |     "densepose_coco_2014_valminusminival": (
25 |         "coco/val2014",
26 |         "coco/annotations/densepose_valminusminival2014.json",
27 |     ),
28 | }
29 | 
30 | for key, (image_root, json_file) in SPLITS.items():
31 |     # Assume pre-defined datasets live in `./datasets`.
32 |     register_coco_instances(
33 |         key,
34 |         get_densepose_metadata(),
35 |         os.path.join("datasets", json_file),
36 |         os.path.join("datasets", image_root),
37 |     )
38 | 


--------------------------------------------------------------------------------
/projects/DensePose/densepose/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | 
 4 | 
 5 | def verbosity_to_level(verbosity):
 6 |     if verbosity is not None:
 7 |         if verbosity == 0:
 8 |             return logging.WARNING
 9 |         elif verbosity == 1:
10 |             return logging.INFO
11 |         elif verbosity >= 2:
12 |             return logging.DEBUG
13 |     return logging.WARNING
14 | 


--------------------------------------------------------------------------------
/projects/DensePose/densepose/vis/bounding_box.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .base import RectangleVisualizer, TextVisualizer
 3 | 
 4 | 
 5 | class BoundingBoxVisualizer(object):
 6 |     def __init__(self):
 7 |         self.rectangle_visualizer = RectangleVisualizer()
 8 | 
 9 |     def visualize(self, image_bgr, boxes_xywh):
10 |         for bbox_xywh in boxes_xywh:
11 |             image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
12 |         return image_bgr
13 | 
14 | 
15 | class ScoredBoundingBoxVisualizer(object):
16 |     def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
17 |         if bbox_visualizer_params is None:
18 |             bbox_visualizer_params = {}
19 |         if score_visualizer_params is None:
20 |             score_visualizer_params = {}
21 |         self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
22 |         self.visualizer_score = TextVisualizer(**score_visualizer_params)
23 | 
24 |     def visualize(self, image_bgr, scored_bboxes):
25 |         boxes_xywh, box_scores = scored_bboxes
26 |         assert len(boxes_xywh) == len(box_scores), (
27 |             "Number of bounding boxes {} should be equal to the number of "
28 |             "scores".format(len(boxes_xywh), len(box_scores))
29 |         )
30 |         for i, box_xywh in enumerate(boxes_xywh):
31 |             score_i = box_scores[i]
32 |             image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
33 |             score_txt = "{0:6.4f}".format(score_i)
34 |             topleft_xy = box_xywh[0], box_xywh[1]
35 |             image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
36 |         return image_bgr
37 | 


--------------------------------------------------------------------------------
/projects/DensePose/train_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | """
 3 | DensePose Training Script.
 4 | 
 5 | This script is similar to the training script in detectron2/tools.
 6 | 
 7 | It is an example of how a user might use detectron2 for a new project.
 8 | """
 9 | 
10 | import os
11 | 
12 | import detectron2.utils.comm as comm
13 | from detectron2.checkpoint import DetectionCheckpointer
14 | from detectron2.config import get_cfg
15 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
16 | from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
17 | from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
18 | from detectron2.utils.logger import setup_logger
19 | 
20 | from densepose import DatasetMapper, DensePoseCOCOEvaluator, add_densepose_config
21 | 
22 | 
23 | class Trainer(DefaultTrainer):
24 |     @classmethod
25 |     def build_evaluator(cls, cfg, dataset_name):
26 |         output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
27 |         evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
28 |         if cfg.MODEL.DENSEPOSE_ON:
29 |             evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
30 |         return DatasetEvaluators(evaluators)
31 | 
32 |     @classmethod
33 |     def build_test_loader(cls, cfg, dataset_name):
34 |         return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
35 | 
36 |     @classmethod
37 |     def build_train_loader(cls, cfg):
38 |         return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
39 | 
40 | 
41 | def setup(args):
42 |     cfg = get_cfg()
43 |     add_densepose_config(cfg)
44 |     cfg.merge_from_file(args.config_file)
45 |     cfg.merge_from_list(args.opts)
46 |     cfg.freeze()
47 |     default_setup(cfg, args)
48 |     # Setup logger for "densepose" module
49 |     setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
50 |     return cfg
51 | 
52 | 
53 | def main(args):
54 |     cfg = setup(args)
55 | 
56 |     if args.eval_only:
57 |         model = Trainer.build_model(cfg)
58 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
59 |             cfg.MODEL.WEIGHTS, resume=args.resume
60 |         )
61 |         res = Trainer.test(cfg, model)
62 |         if comm.is_main_process():
63 |             verify_results(cfg, res)
64 |         return res
65 | 
66 |     trainer = Trainer(cfg)
67 |     trainer.resume_or_load(resume=args.resume)
68 |     return trainer.train()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     args = default_argument_parser().parse_args()
73 |     print("Command Line Args:", args)
74 |     launch(
75 |         main,
76 |         args.num_gpus,
77 |         num_machines=args.num_machines,
78 |         machine_rank=args.machine_rank,
79 |         dist_url=args.dist_url,
80 |         args=(args,),
81 |     )
82 | 


--------------------------------------------------------------------------------
/projects/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Here are a few research projects that are built on detectron2.
 3 | They are examples of how to use detectron2 as a library, to make your projects more
 4 | maintainable.
 5 | 
 6 | + [DensePose: Dense Human Pose Estimation In The Wild](DensePose)
 7 | + [Scale-Aware Trident Networks for Object Detection](TridentNet)
 8 | + TensorMask: A Foundation for Dense Object Segmentation.  (Coming Soon)
 9 | + Mesh R-CNN.  (Coming Soon)
10 | 


--------------------------------------------------------------------------------
/projects/TridentNet/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # TridentNet in Detectron2
 3 | **Scale-Aware Trident Networks for Object Detection**
 4 | 
 5 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang
 6 | 
 7 | [[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingTridentNet)]
 8 | 
 9 | <div align="center">
10 |   <img src="https://drive.google.com/uc?export=view&id=10THEPdIPmf3ooMyNzrfZbpWihEBvixwt" width="700px" />
11 | </div>
12 | 
13 | In this repository, we implement TridentNet-Fast in the Detectron2 framework. Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost.
14 | 
15 | ## Training
16 | 
17 | To train a model one can call
18 | ```bash
19 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file <config.yaml>
20 | ```
21 | 
22 | For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs,
23 | one should execute:
24 | ```bash
25 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file /path/to/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml --num_gpus 8
26 | ```
27 | 
28 | ## Testing
29 | 
30 | Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
31 | model location specification through `MODEL.WEIGHT model.pth` in the command line
32 | ```bash
33 | python /path/to/detectron2/projects/TridentNet/train_net.py --config-file /path/to/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHT model.pth
34 | ```
35 | 
36 | ## Results on MS-COCO in Detectron2
37 | 
38 | |Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl|
39 | |-----|--------|----|--------------|--|----|----|---|---|---|
40 | |Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7|
41 | |TridentFast|R50-C4|C5-128ROI|1X|37.9|57.8|40.7|19.7|42.1|54.2|
42 | |Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1|
43 | |TridentFast|R50-C4|C5-128ROI|3X|41.0|60.9|44.2|22.7|45.2|57.0|
44 | |Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9|
45 | |TridentFast|R101-C4|C5-128ROI|3X|43.4|62.9|46.6|24.2|47.9|59.9|
46 | 
47 | 
48 | ## <a name="CitingTridentNet"></a>Citing TridentNet
49 | 
50 | If you use TridentNet, please use the following BibTeX entry.
51 | 
52 | ```
53 | @InProceedings{li2019scale,
54 |   title={Scale-Aware Trident Networks for Object Detection},
55 |   author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
56 |   journal={The International Conference on Computer Vision (ICCV)},
57 |   year={2019}
58 | }
59 | ```
60 | 
61 | 


--------------------------------------------------------------------------------
/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_trident_resnet_backbone"
 5 |   ROI_HEADS:
 6 |     NAME: "TridentRes5ROIHeads"
 7 |     POSITIVE_FRACTION: 0.5
 8 |     BATCH_SIZE_PER_IMAGE: 128
 9 |     PROPOSAL_APPEND_GT: False
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "TridentRPN"
12 |   RPN:
13 |     POST_NMS_TOPK_TRAIN: 500
14 |   TRIDENT:
15 |     NUM_BRANCH: 3
16 |     BRANCH_DILATIONS: [1, 2, 3]
17 |     TEST_BRANCH_IDX: 1
18 |     TRIDENT_STAGE: "res4"
19 | DATASETS:
20 |   TRAIN: ("coco_2017_train",)
21 |   TEST: ("coco_2017_val",)
22 | SOLVER:
23 |   IMS_PER_BATCH: 16
24 |   BASE_LR: 0.02
25 |   STEPS: (60000, 80000)
26 |   MAX_ITER: 90000
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
29 | 


--------------------------------------------------------------------------------
/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-TridentNet-Fast-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-TridentNet-Fast-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/projects/TridentNet/train_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | """
 3 | TridentNet Training Script.
 4 | 
 5 | This script is a simplified version of the training script in detectron2/tools.
 6 | """
 7 | 
 8 | import os
 9 | 
10 | import detectron2.utils.comm as comm
11 | from detectron2.checkpoint import DetectionCheckpointer
12 | from detectron2.config import get_cfg
13 | from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
14 | from detectron2.evaluation import COCOEvaluator, verify_results
15 | 
16 | from tridentnet import add_tridentnet_config
17 | 
18 | 
19 | class Trainer(DefaultTrainer):
20 |     @classmethod
21 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
22 |         if output_folder is None:
23 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
24 |         return COCOEvaluator(dataset_name, cfg, True, output_folder)
25 | 
26 | 
27 | def setup(args):
28 |     """
29 |     Create configs and perform basic setups.
30 |     """
31 |     cfg = get_cfg()
32 |     add_tridentnet_config(cfg)
33 |     cfg.merge_from_file(args.config_file)
34 |     cfg.merge_from_list(args.opts)
35 |     cfg.freeze()
36 |     default_setup(cfg, args)
37 |     return cfg
38 | 
39 | 
40 | def main(args):
41 |     cfg = setup(args)
42 | 
43 |     if args.eval_only:
44 |         model = Trainer.build_model(cfg)
45 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
46 |             cfg.MODEL.WEIGHTS, resume=args.resume
47 |         )
48 |         res = Trainer.test(cfg, model)
49 |         if comm.is_main_process():
50 |             verify_results(cfg, res)
51 |         return res
52 | 
53 |     trainer = Trainer(cfg)
54 |     trainer.resume_or_load(resume=args.resume)
55 |     return trainer.train()
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     args = default_argument_parser().parse_args()
60 |     print("Command Line Args:", args)
61 |     launch(
62 |         main,
63 |         args.num_gpus,
64 |         num_machines=args.num_machines,
65 |         machine_rank=args.machine_rank,
66 |         dist_url=args.dist_url,
67 |         args=(args,),
68 |     )
69 | 


--------------------------------------------------------------------------------
/projects/TridentNet/tridentnet/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .config import add_tridentnet_config
 3 | from .trident_backbone import (
 4 |     TridentBottleneckBlock,
 5 |     build_trident_resnet_backbone,
 6 |     make_trident_stage,
 7 | )
 8 | from .trident_rpn import TridentRPN
 9 | from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads
10 | 


--------------------------------------------------------------------------------
/projects/TridentNet/tridentnet/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | 
 6 | 
 7 | def add_tridentnet_config(cfg):
 8 |     """
 9 |     Add config for tridentnet.
10 |     """
11 |     _C = cfg
12 | 
13 |     _C.MODEL.TRIDENT = CN()
14 | 
15 |     # Number of branches for TridentNet.
16 |     _C.MODEL.TRIDENT.NUM_BRANCH = 3
17 |     # Specfiy the dilations for each branch.
18 |     _C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3]
19 |     # Specify the stage for applying trident blocks. Default stage is Res4 according to the
20 |     # TridentNet paper.
21 |     _C.MODEL.TRIDENT.TRIDENT_STAGE = "res4"
22 |     # Specify the test branch index TridentNet Fast inference:
23 |     #   - use -1 to aggreate results of all branches during inference.
24 |     #   - otherwise, only using specified branch for fast inference. Recommended setting is
25 |     #     to use the middle branch.
26 |     _C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1
27 | 


--------------------------------------------------------------------------------
/projects/TridentNet/tridentnet/trident_rpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY
 5 | from detectron2.modeling.proposal_generator.rpn import RPN
 6 | from detectron2.structures import ImageList
 7 | 
 8 | 
 9 | @PROPOSAL_GENERATOR_REGISTRY.register()
10 | class TridentRPN(RPN):
11 |     """
12 |     Trident RPN subnetwork.
13 |     """
14 | 
15 |     def __init__(self, cfg, input_shape):
16 |         super(TridentRPN, self).__init__(cfg, input_shape)
17 | 
18 |         self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH
19 |         self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1
20 | 
21 |     def forward(self, images, features, gt_instances=None):
22 |         """
23 |         See :class:`RPN.forward`.
24 |         """
25 |         num_branch = self.num_branch if self.training or not self.trident_fast else 1
26 |         # Duplicate images and gt_instances for all branches in TridentNet.
27 |         all_images = ImageList(
28 |             torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch
29 |         )
30 |         all_gt_instances = gt_instances * num_branch if gt_instances is not None else None
31 | 
32 |         return super(TridentRPN, self).forward(all_images, features, all_gt_instances)
33 | 


--------------------------------------------------------------------------------
/scripts/demo.sh:
--------------------------------------------------------------------------------
1 | python demo/demo.py --config-file configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml \
2 |   --input input.jpg --output ./viscocodect/input_vis.png\
3 |   --opts MODEL.WEIGHTS detectron2://COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl
4 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | output_dir="./outputs/flickr30kRegRel"
 3 | DATE=`date "+%m-%d-%h"`
 4 | 
 5 | exp_name="07-26-Jul-GroundR-Visual(Rel_T50-NoST-P7-s5, VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2,decShare)_Phr(Sent,UniMean,1Emb)_Reg(Warmup75,2layer,0p6,smax,0p1,GAP0p1)_DISC(smean,sent,M0.2)_rel(1p0Cls,2Stage,MP_trans)_SGD_0.001_v1_work8"
 6 | 
 7 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR"， "WarmupPolyLR"
 8 | ## Optimizer "SGD", "Adam"
 9 | ## Phrase: "Mean", "Sum", "Last"
10 | 
11 | 
12 | export CUDA_VISIBLE_DEVICES=0,1,2,3
13 | #export CUDA_VISIBLE_DEVICES=3,5,6,7
14 | python tools/train_weakly_grounding.py --num-gpus 4 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \
15 |        OUTPUT_DIR "$output_dir/$exp_name"\
16 |        SOLVER.OPTIMIZER 'SGD' \
17 |        SOLVER.IMS_PER_BATCH 40 \
18 |        SOLVER.BASE_LR 0.001 \
19 |        SOLVER.DISC_IMG_SENT_LOSS True \
20 |        SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
21 |        SOLVER.STEPS "(32000, 40000,)" \
22 |        SOLVER.MAX_ITER 80000 \
23 |        SOLVER.REG_START_ITER 7500 \
24 |        SOLVER.CHECKPOINT_PERIOD 2500 \
25 |        MODEL.VG.NETWORK 'RegRel'\
26 |        MODEL.VG.SEMANTIC_NOUNS_TOPK 300 \
27 |        MODEL.VG.SEM_NOUNS_LOSS_FACTOR 0.5 \
28 |        MODEL.VG.SEMANTIC_ATTR_TOPK 79 \
29 |        MODEL.VG.REL_CLS_LOSS_FACTOR 1.0 \
30 |        MODEL.VG.REG_LOSS_FACTOR 0.1 \
31 |        MODEL.VG.REG_IOU 0.6 \
32 |        MODEL.VG.REG_GAP_SCORE 0.1 \
33 |        MODEL.VG.SPATIAL_FEAT False \
34 |        MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
35 |        MODEL.VG.PRECOMP_TOPK 50 \
36 |        MODEL.VG.S2_TOPK 5 \
37 |        MODEL.VG.EMBEDDING_SOURCE 'Sent' \
38 |        MODEL.VG.LSTM_BIDIRECTION False \
39 |        MODEL.VG.USING_DET_KNOWLEDGE True \
40 |        MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
41 |        MODEL.RELATION.IS_ON True \
42 |        DATALOADER.NUM_WORKERS 8 \
43 |        DATASETS.NAME 'flickr30k'\
44 |        DATASETS.TEST "(\"flickr30k_val\", \"flickr30k_test\")" \
45 |        MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0074999.pth"
46 | 
47 | 
48 | 
49 | #export CUDA_VISIBLE_DEVICES=1,2
50 | #python tools/train_weakly_grounding.py --num-gpus 2 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \
51 | #       OUTPUT_DIR "$output_dir/$exp_name" \
52 | #       SOLVER.OPTIMIZER 'SGD' \
53 | #       SOLVER.IMS_PER_BATCH 2 \
54 | #       SOLVER.BASE_LR 0.0005 \
55 | #       SOLVER.INIT_PARA False \
56 | #       SOLVER.FIX_BACKBONE True \
57 | #       SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
58 | #       SOLVER.STEPS "(20000, 40000,)" \
59 | #       SOLVER.MAX_ITER 80000  \
60 | #       MODEL.VG.NETWORK 'Reg' \
61 | #       SOLVER.CHECKPOINT_PERIOD 2500 \
62 | #       MODEL.VG.SPATIAL_FEAT False \
63 | #       MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
64 | #       MODEL.VG.PRECOMP_TOPK 50 \
65 | #       MODEL.VG.S2_TOPK 5 \
66 | #       MODEL.VG.USING_ELMO False \
67 | #       MODEL.VG.EMBEDDING_SOURCE 'Sent' \
68 | #       MODEL.VG.LSTM_BIDIRECTION False \
69 | #       MODEL.VG.USING_DET_KNOWLEDGE True \
70 | #       MODEL.RELATION.IS_ON False \
71 | #       MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
72 | #       MODEL.RELATION.INTRA_LAN False \
73 | #       MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0047499.pth" \
74 | #       DATALOADER.NUM_WORKERS 4 \
75 | #       DATASETS.NAME 'flickr30k' \
76 | #       DATALOADER.ASPECT_RATIO_GROUPING True \
77 | #       TEST.EVAL_PERIOD 2500
78 | 


--------------------------------------------------------------------------------
/scripts/test_kac.sh:
--------------------------------------------------------------------------------
 1 | output_dir="./outputs/flickr30k_kac"
 2 | DATE=`date "+%m-%d-%h"`
 3 | 
 4 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2)
 5 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR"， "WarmupPolyLR"
 6 | ## Optimizer "SGD", "Adam"
 7 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3)
 8 | ## Phrase: "Mean", "Sum", "Last"
 9 | 
10 | ##
11 | 
12 | exp_name="06-17-Jun-GroundR-Visual(50-P7,DetSkipPrior,attvis,BN,spnorm)_Phr(Sent,UniMean)_visconst(w10,sum)_loss(m0p5)_SGD_0.0005_v1"
13 | 
14 | 
15 | export CUDA_VISIBLE_DEVICES=0
16 | #export CUDA_VISIBLE_DEVICES=3,5,6,7
17 | python tools/train_kac.py --num-gpus 1 --eval-only --config-file configs/WeaklyGrounding-RN101-C4.yaml \
18 |        OUTPUT_DIR "$output_dir/$exp_name"\
19 |        SOLVER.OPTIMIZER 'SGD' \
20 |        SOLVER.IMS_PER_BATCH 1 \
21 |        SOLVER.DISC_IMG_SENT_LOSS False \
22 |        SOLVER.BASE_LR 0.0005 \
23 |        SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
24 |        SOLVER.STEPS "(32000, 40000,)" \
25 |        SOLVER.MAX_ITER 80000 \
26 |        SOLVER.CHECKPOINT_PERIOD 2500 \
27 |        MODEL.VG.SPATIAL_FEAT False \
28 |        MODEL.VG.NETWORK 'Kac' \
29 |        MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
30 |        MODEL.VG.PRECOMP_TOPK 50 \
31 |        MODEL.VG.EMBEDDING_SOURCE 'Sent' \
32 |        MODEL.VG.LSTM_BIDIRECTION False \
33 |        MODEL.VG.USING_DET_KNOWLEDGE True \
34 |        MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
35 |        DATALOADER.NUM_WORKERS 8 \
36 |        TEST.EVAL_PERIOD 2500 \
37 |        DATASETS.TEST "(\"flickr30k_val\", \"flickr30k_test\")" \
38 |        MODEL.WEIGHTS "$output_dir/$exp_name/checkpoints/model_0054999.pth"


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | output_dir="./outputs/flickr30k"
 3 | DATE=`date "+%m-%d-%h"`
 4 | 
 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2)
 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR"， "WarmupPolyLR"
 7 | ## Optimizer "SGD", "Adam"
 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3)
 9 | ## Phrase: "Mean", "Sum", "Last"
10 | #$DATE-GroundR-Visual(T50-K5-NoST-P7, VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2, decShare)_Phr(Sent,UniMean,1Emb)_Reg(True75,2layerLeakly,0p6,smax,0p1,GAP0p3)_DISC(smean,sent,M0.2)_NoREL(bo)_SGD_0.001_v1
11 | ## NETWORK 'ML_Reg', 'Reg', 'PixelBox', 'Kac', 'Baseline', 'Baseline_s2'
12 | ## --gpu-check
13 | ## rel(0p1Cls,1Stage)
14 | ## rel(0p1Cls,2Stage,MP_trans)
15 | ## Reg(Warmup8w,2layer,0p6,smax,0p0,GAP0p1,lossOffNoApply)
16 | ## VEmbRelu, SShare, DetSkipPrior, BN, ATTFuseDet2,decShare
17 | 
18 | export CUDA_VISIBLE_DEVICES=0,1,2,3
19 | #export CUDA_VISIBLE_DEVICES=3,5,6,7
20 | python tools/train_weakly_grounding.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \
21 |        OUTPUT_DIR "$output_dir/$DATE-Visual(T50-NoST-P7-s5)_Phr(Sent,UniMean,1Emb)_DISC(smean,sent,M0.2)_Reg(Warmup75,2layer,0p6,smax,0p1,GAP0p1)_rel(1p0Cls,2Stage,MP_trans)_SGD_0.001"\
22 |        SOLVER.OPTIMIZER 'SGD' \
23 |        SOLVER.IMS_PER_BATCH 40 \
24 |        SOLVER.BASE_LR 0.001 \
25 |        SOLVER.DISC_IMG_SENT_LOSS True \
26 |        SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
27 |        SOLVER.STEPS "(32000, 40000,)" \
28 |        SOLVER.MAX_ITER 80000 \
29 |        SOLVER.REG_START_ITER 7500 \
30 |        SOLVER.CHECKPOINT_PERIOD 2500 \
31 |        MODEL.VG.NETWORK 'RegRel'\
32 |        MODEL.VG.REL_CLS_LOSS_FACTOR 1.0 \
33 |        MODEL.VG.REG_LOSS_FACTOR 0.1 \
34 |        MODEL.VG.REG_IOU 0.6 \
35 |        MODEL.VG.REG_GAP_SCORE 0.1 \
36 |        MODEL.VG.SPATIAL_FEAT False \
37 |        MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
38 |        MODEL.VG.PRECOMP_TOPK 50 \
39 |        MODEL.VG.S2_TOPK 5 \
40 |        MODEL.VG.EMBEDDING_SOURCE 'Sent' \
41 |        MODEL.VG.LSTM_BIDIRECTION False \
42 |        MODEL.VG.USING_DET_KNOWLEDGE True \
43 |        MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
44 |        MODEL.RELATION.IS_ON True \
45 |        DATALOADER.NUM_WORKERS 8 \
46 |        DATASETS.NAME 'flickr30k'\
47 |        TEST.EVAL_PERIOD 2500
48 | 


--------------------------------------------------------------------------------
/scripts/train_baseline.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | output_dir="./outputs/flickr30k_kac"
 3 | DATE=`date "+%m-%d-%h"`
 4 | 
 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2)
 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR"， "WarmupPolyLR"
 7 | ## Optimizer "SGD", "Adam"
 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3)
 9 | ## Phrase: "Mean", "Sum", "Last"
10 | export CUDA_VISIBLE_DEVICES=0,1,2,3
11 | #export CUDA_VISIBLE_DEVICES=3,5,6,7
12 | python tools/train_kac.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \
13 |        OUTPUT_DIR "$output_dir/$DATE-bs_GroundR-Visual(50-P7,DetSkipPrior,attvis,BN)_Phr(Sent,UniMean)_DISC(None)_SGD_0.0005_v2"\
14 |        SOLVER.OPTIMIZER 'SGD' \
15 |        SOLVER.IMS_PER_BATCH 40 \
16 |        SOLVER.BASE_LR 0.0005 \
17 |        SOLVER.DISC_IMG_SENT_LOSS False \
18 |        SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
19 |        SOLVER.STEPS "(32000, 40000,)" \
20 |        SOLVER.MAX_ITER 80000 \
21 |        SOLVER.CHECKPOINT_PERIOD 2500 \
22 |        MODEL.VG.REG_LOSS_FACTOR 1.0 \
23 |        MODEL.VG.SPATIAL_FEAT False \
24 |        MODEL.VG.NETWORK 'Baseline' \
25 |        MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
26 |        MODEL.VG.PRECOMP_TOPK 50 \
27 |        MODEL.VG.EMBEDDING_SOURCE 'Sent' \
28 |        MODEL.VG.LSTM_BIDIRECTION False \
29 |        MODEL.VG.USING_DET_KNOWLEDGE True \
30 |        MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
31 |        DATALOADER.NUM_WORKERS 8 \
32 |        TEST.EVAL_PERIOD 2500


--------------------------------------------------------------------------------
/scripts/train_kac.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | output_dir="./outputs/flickr30k_kac"
 3 | DATE=`date "+%m-%d-%h"`
 4 | 
 5 | ## Disc(smean, BN, oneEmb, inner, weighted, margin0.2)
 6 | ## LR_SCHEDULER_NAME "WarmupMultiStepLR"， "WarmupPolyLR"
 7 | ## Optimizer "SGD", "Adam"
 8 | ## Reg(5000,1layer,0p6,s10,0p1,gap0p3)
 9 | ## Phrase: "Mean", "Sum", "Last"
10 | export CUDA_VISIBLE_DEVICES=0,1,2,3
11 | #export CUDA_VISIBLE_DEVICES=3,5,6,7
12 | python tools/train_kac.py --num-gpus 4 --dist-url auto --config-file configs/WeaklyGrounding-RN101-C4.yaml \
13 |        OUTPUT_DIR "$output_dir/$DATE-GroundR-Visual(50-P7,DetSkipPrior,attvis,BN,NST)_Phr(Phr,UniMean)_Disc(None)_SGD_0.0005_v1"\
14 |        SOLVER.OPTIMIZER 'SGD' \
15 |        SOLVER.IMS_PER_BATCH 40 \
16 |        SOLVER.DISC_IMG_SENT_LOSS False \
17 |        SOLVER.BASE_LR 0.0005 \
18 |        SOLVER.LR_SCHEDULER_NAME "WarmupMultiStepLR" \
19 |        SOLVER.STEPS "(32000, 40000,)" \
20 |        SOLVER.MAX_ITER 80000 \
21 |        SOLVER.CHECKPOINT_PERIOD 2500 \
22 |        MODEL.VG.SPATIAL_FEAT False \
23 |        MODEL.VG.NETWORK 'Baseline' \
24 |        MODEL.VG.PHRASE_SELECT_TYPE 'Mean' \
25 |        MODEL.VG.PRECOMP_TOPK 50 \
26 |        MODEL.VG.EMBEDDING_SOURCE 'Sent' \
27 |        MODEL.VG.LSTM_BIDIRECTION False \
28 |        MODEL.VG.USING_DET_KNOWLEDGE True \
29 |        MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 7 \
30 |        DATALOADER.NUM_WORKERS 8 \
31 |        TEST.EVAL_PERIOD 2500


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=4
 4 | known_standard_library=numpy,setuptools
 5 | known_myself=detectron2
 6 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,torchvision
 7 | no_lines_before=STDLIB,THIRDPARTY
 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
 9 | default_section=FIRSTPARTY
10 | 
11 | [mypy]
12 | python_version=3.6
13 | ignore_missing_imports = True
14 | warn_unused_configs = True
15 | disallow_untyped_defs = True
16 | check_untyped_defs = True
17 | warn_unused_ignores = True
18 | warn_redundant_casts = True
19 | show_column_numbers = True
20 | follow_imports = silent
21 | allow_redefinition = True
22 | ; Require all functions to be annotated
23 | disallow_incomplete_defs = True
24 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import glob
 5 | import os
 6 | from setuptools import find_packages, setup
 7 | import torch
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | 
11 | def get_extensions():
12 |     this_dir = os.path.dirname(os.path.abspath(__file__))
13 |     extensions_dir = os.path.join(this_dir, "detectron2", "layers", "csrc")
14 | 
15 |     main_source = os.path.join(extensions_dir, "vision.cpp")
16 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
17 |     source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu"))
18 | 
19 |     sources = [main_source] + sources
20 | 
21 |     extension = CppExtension
22 | 
23 |     extra_compile_args = {"cxx": []}
24 |     define_macros = []
25 | 
26 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
27 |         extension = CUDAExtension
28 |         sources += source_cuda
29 |         define_macros += [("WITH_CUDA", None)]
30 |         extra_compile_args["nvcc"] = [
31 |             "-DCUDA_HAS_FP16=1",
32 |             "-D__CUDA_NO_HALF_OPERATORS__",
33 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
34 |             "-D__CUDA_NO_HALF2_OPERATORS__",
35 |         ]
36 | 
37 |         # It's better if pytorch can do this by default ..
38 |         CC = os.environ.get("CC", None)
39 |         if CC is not None:
40 |             extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "detectron2._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="detectron2",
61 |     version="0.1",
62 |     author="FAIR",
63 |     url="https://github.com/facebookresearch/detectron2",
64 |     description="Detectron2 is FAIR's next-generation research "
65 |     "platform for object detection and segmentation.",
66 |     packages=find_packages(exclude=("configs", "tests")),
67 |     install_requires=[
68 |         "termcolor>=1.1",
69 |         "Pillow",
70 |         "yacs>=0.1.6",
71 |         "tabulate",
72 |         "cloudpickle",
73 |         "matplotlib",
74 |         "tqdm>4.29.0",
75 |         "shapely",
76 |         "tensorboard",
77 |     ],
78 |     ext_modules=get_extensions(),
79 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
80 | )
81 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/tests/test_box2box_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import unittest
 4 | import torch
 5 | 
 6 | from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def random_boxes(mean_box, stdev, N):
12 |     return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
13 | 
14 | 
15 | class TestBox2BoxTransform(unittest.TestCase):
16 |     def test_reconstruction(self):
17 |         weights = (5, 5, 10, 10)
18 |         b2b_tfm = Box2BoxTransform(weights=weights)
19 |         src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
20 |         dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
21 | 
22 |         devices = [torch.device("cpu")]
23 |         if torch.cuda.is_available():
24 |             devices.append(torch.device("cuda"))
25 |         for device in devices:
26 |             src_boxes = src_boxes.to(device=device)
27 |             dst_boxes = dst_boxes.to(device=device)
28 |             deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
29 |             dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
30 |             assert torch.allclose(dst_boxes, dst_boxes_reconstructed)
31 | 
32 | 
33 | def random_rotated_boxes(mean_box, std_length, std_angle, N):
34 |     return torch.cat(
35 |         [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1
36 |     ) + torch.tensor(mean_box, dtype=torch.float)
37 | 
38 | 
39 | class TestBox2BoxTransformRotated(unittest.TestCase):
40 |     def test_reconstruction(self):
41 |         weights = (5, 5, 10, 10, 1)
42 |         b2b_transform = Box2BoxTransformRotated(weights=weights)
43 |         src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
44 |         dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
45 | 
46 |         devices = [torch.device("cpu")]
47 |         if torch.cuda.is_available():
48 |             devices.append(torch.device("cuda"))
49 |         for device in devices:
50 |             src_boxes = src_boxes.to(device=device)
51 |             dst_boxes = dst_boxes.to(device=device)
52 |             deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
53 |             dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes)
54 |             assert torch.allclose(dst_boxes, dst_boxes_reconstructed, atol=1e-5)
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     unittest.main()
59 | 


--------------------------------------------------------------------------------
/tests/test_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | import unittest
 4 | import torch
 5 | 
 6 | from detectron2.structures import Boxes, BoxMode, pairwise_iou
 7 | 
 8 | 
 9 | class TestBoxMode(unittest.TestCase):
10 |     def _convert_xy_to_wh(self, x):
11 |         return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
12 | 
13 |     def test_box_convert_list(self):
14 |         for tp in [list, tuple]:
15 |             box = tp([5, 5, 10, 10])
16 |             output = self._convert_xy_to_wh(box)
17 |             self.assertTrue(output == tp([5, 5, 5, 5]))
18 | 
19 |             with self.assertRaises(Exception):
20 |                 self._convert_xy_to_wh([box])
21 | 
22 |     def test_box_convert_array(self):
23 |         box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]])
24 |         output = self._convert_xy_to_wh(box)
25 |         self.assertTrue((output[0] == [5, 5, 5, 5]).all())
26 |         self.assertTrue((output[1] == [1, 1, 1, 2]).all())
27 | 
28 |     def test_box_convert_tensor(self):
29 |         box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
30 |         output = self._convert_xy_to_wh(box).numpy()
31 |         self.assertTrue((output[0] == [5, 5, 5, 5]).all())
32 |         self.assertTrue((output[1] == [1, 1, 1, 2]).all())
33 | 
34 | 
35 | class TestBoxIOU(unittest.TestCase):
36 |     def test_pairwise_iou(self):
37 |         boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
38 | 
39 |         boxes2 = torch.tensor(
40 |             [
41 |                 [0.0, 0.0, 1.0, 1.0],
42 |                 [0.0, 0.0, 0.5, 1.0],
43 |                 [0.0, 0.0, 1.0, 0.5],
44 |                 [0.0, 0.0, 0.5, 0.5],
45 |                 [0.5, 0.5, 1.0, 1.0],
46 |                 [0.5, 0.5, 1.5, 1.5],
47 |             ]
48 |         )
49 | 
50 |         expected_ious = torch.tensor(
51 |             [
52 |                 [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
53 |                 [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
54 |             ]
55 |         )
56 | 
57 |         ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
58 | 
59 |         assert torch.allclose(ious, expected_ious)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     unittest.main()
64 | 


--------------------------------------------------------------------------------
/tests/test_checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import unittest
 3 | from collections import OrderedDict
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts
 8 | from detectron2.utils.logger import setup_logger
 9 | 
10 | 
11 | class TestCheckpointer(unittest.TestCase):
12 |     def setUp(self):
13 |         setup_logger()
14 | 
15 |     def create_complex_model(self):
16 |         m = nn.Module()
17 |         m.block1 = nn.Module()
18 |         m.block1.layer1 = nn.Linear(2, 3)
19 |         m.layer2 = nn.Linear(3, 2)
20 |         m.res = nn.Module()
21 |         m.res.layer2 = nn.Linear(3, 2)
22 | 
23 |         state_dict = OrderedDict()
24 |         state_dict["layer1.weight"] = torch.rand(3, 2)
25 |         state_dict["layer1.bias"] = torch.rand(3)
26 |         state_dict["layer2.weight"] = torch.rand(2, 3)
27 |         state_dict["layer2.bias"] = torch.rand(2)
28 |         state_dict["res.layer2.weight"] = torch.rand(2, 3)
29 |         state_dict["res.layer2.bias"] = torch.rand(2)
30 |         return m, state_dict
31 | 
32 |     def test_complex_model_loaded(self):
33 |         for add_data_parallel in [False, True]:
34 |             model, state_dict = self.create_complex_model()
35 |             if add_data_parallel:
36 |                 model = nn.DataParallel(model)
37 |             model_sd = model.state_dict()
38 | 
39 |             align_and_update_state_dicts(model_sd, state_dict)
40 |             for loaded, stored in zip(model_sd.values(), state_dict.values()):
41 |                 # different tensor references
42 |                 self.assertFalse(id(loaded) == id(stored))
43 |                 # same content
44 |                 self.assertTrue(loaded.equal(stored))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | 
 5 | import os
 6 | import tempfile
 7 | import unittest
 8 | 
 9 | from detectron2.config import downgrade_config, get_cfg, upgrade_config
10 | 
11 | _V0_CFG = """
12 | MODEL:
13 |   RPN_HEAD:
14 |     NAME: "TEST"
15 | VERSION: 0
16 | """
17 | 
18 | _V1_CFG = """
19 | MODEL:
20 |   WEIGHT: "/path/to/weight"
21 | """
22 | 
23 | 
24 | class TestConfigVersioning(unittest.TestCase):
25 |     def test_upgrade_downgrade_consistency(self):
26 |         cfg = get_cfg()
27 |         # check that custom is preserved
28 |         cfg.USER_CUSTOM = 1
29 | 
30 |         down = downgrade_config(cfg, to_version=0)
31 |         up = upgrade_config(down)
32 |         self.assertTrue(up == cfg)
33 | 
34 |     def _merge_cfg_str(self, cfg, merge_str):
35 |         f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
36 |         try:
37 |             f.write(merge_str)
38 |             f.close()
39 |             cfg.merge_from_file(f.name)
40 |         finally:
41 |             os.remove(f.name)
42 |         return cfg
43 | 
44 |     def test_auto_upgrade(self):
45 |         cfg = get_cfg()
46 |         latest_ver = cfg.VERSION
47 |         cfg.USER_CUSTOM = 1
48 | 
49 |         self._merge_cfg_str(cfg, _V0_CFG)
50 | 
51 |         self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST")
52 |         self.assertEqual(cfg.VERSION, latest_ver)
53 | 
54 |     def test_guess_v1(self):
55 |         cfg = get_cfg()
56 |         latest_ver = cfg.VERSION
57 |         self._merge_cfg_str(cfg, _V1_CFG)
58 |         self.assertEqual(cfg.VERSION, latest_ver)
59 | 


--------------------------------------------------------------------------------
/tests/test_data_transform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import logging
 5 | import numpy as np
 6 | import unittest
 7 | 
 8 | from detectron2.config import get_cfg
 9 | from detectron2.data import detection_utils
10 | from detectron2.data import transforms as T
11 | from detectron2.utils.logger import setup_logger
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class TestTransforms(unittest.TestCase):
17 |     def setUp(self):
18 |         setup_logger()
19 | 
20 |     def test_crop_polygons(self):
21 |         # Ensure that shapely produce an extra vertex at the end
22 |         import shapely.geometry as geometry
23 | 
24 |         polygon = np.asarray([3, 3.5, 11, 10.0, 38, 98, 15.0, 100.0]).reshape(-1, 2)
25 |         g = geometry.Polygon(polygon)
26 |         coords = np.asarray(g.exterior.coords)
27 |         self.assertEqual(coords[0].tolist(), coords[-1].tolist())
28 | 
29 |     def test_apply_rotated_boxes(self):
30 |         np.random.seed(125)
31 |         cfg = get_cfg()
32 |         is_train = True
33 |         transform_gen = detection_utils.build_transform_gen(cfg, is_train)
34 |         image = np.random.rand(200, 300)
35 |         image, transforms = T.apply_transform_gens(transform_gen, image)
36 |         image_shape = image.shape[:2]  # h, w
37 |         assert image_shape == (800, 1200)
38 |         annotation = {"bbox": [179, 97, 62, 40, -56]}
39 | 
40 |         boxes = np.array([annotation["bbox"]], dtype=np.float64)  # boxes.shape = (1, 5)
41 |         transformed_bbox = transforms.apply_rotated_box(boxes)[0]
42 | 
43 |         expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64)
44 |         err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox)
45 |         assert np.allclose(transformed_bbox, expected_bbox), err_msg
46 | 
47 |     def test_apply_rotated_boxes_unequal_scaling_factor(self):
48 |         np.random.seed(125)
49 |         h, w = 400, 200
50 |         newh, neww = 800, 800
51 |         image = np.random.rand(h, w)
52 |         transform_gen = []
53 |         transform_gen.append(T.Resize(shape=(newh, neww)))
54 |         image, transforms = T.apply_transform_gens(transform_gen, image)
55 |         image_shape = image.shape[:2]  # h, w
56 |         assert image_shape == (newh, neww)
57 | 
58 |         boxes = np.array(
59 |             [
60 |                 [150, 100, 40, 20, 0],
61 |                 [150, 100, 40, 20, 30],
62 |                 [150, 100, 40, 20, 90],
63 |                 [150, 100, 40, 20, -90],
64 |             ],
65 |             dtype=np.float64,
66 |         )
67 |         transformed_boxes = transforms.apply_rotated_box(boxes)
68 | 
69 |         expected_bboxes = np.array(
70 |             [
71 |                 [600, 200, 160, 40, 0],
72 |                 [600, 200, 144.22205102, 52.91502622, 49.10660535],
73 |                 [600, 200, 80, 80, 90],
74 |                 [600, 200, 80, 80, -90],
75 |             ],
76 |             dtype=np.float64,
77 |         )
78 |         err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes)
79 |         assert np.allclose(transformed_boxes, expected_bboxes), err_msg
80 | 


--------------------------------------------------------------------------------
/tests/test_roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | import unittest
 4 | import cv2
 5 | import torch
 6 | 
 7 | from detectron2.layers.roi_align import ROIAlign
 8 | 
 9 | 
10 | class ROIAlignTest(unittest.TestCase):
11 |     def test_forward_output(self):
12 |         input = np.arange(25).reshape(5, 5).astype("float32")
13 |         """
14 |         0  1  2   3 4
15 |         5  6  7   8 9
16 |         10 11 12 13 14
17 |         15 16 17 18 19
18 |         20 21 22 23 24
19 |         """
20 | 
21 |         output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False)
22 |         output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True)
23 | 
24 |         # without correction:
25 |         old_results = [
26 |             [7.5, 8, 8.5, 9],
27 |             [10, 10.5, 11, 11.5],
28 |             [12.5, 13, 13.5, 14],
29 |             [15, 15.5, 16, 16.5],
30 |         ]
31 | 
32 |         # with 0.5 correction:
33 |         correct_results = [
34 |             [4.5, 5.0, 5.5, 6.0],
35 |             [7.0, 7.5, 8.0, 8.5],
36 |             [9.5, 10.0, 10.5, 11.0],
37 |             [12.0, 12.5, 13.0, 13.5],
38 |         ]
39 |         # This is an upsampled version of [[6, 7], [11, 12]]
40 | 
41 |         self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten()))
42 |         self.assertTrue(
43 |             np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten())
44 |         )
45 | 
46 |         # Also see similar issues in tensorflow at
47 |         # https://github.com/tensorflow/tensorflow/issues/26278
48 | 
49 |     def test_resize(self):
50 |         H, W = 30, 30
51 |         input = np.random.rand(H, W).astype("float32") * 100
52 |         box = [10, 10, 20, 20]
53 |         output = self._simple_roialign(input, box, (5, 5), aligned=True)
54 | 
55 |         input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
56 |         box2x = [x / 2 for x in box]
57 |         output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True)
58 |         diff = np.abs(output2x - output)
59 |         self.assertTrue(diff.max() < 1e-4)
60 | 
61 |     def _simple_roialign(self, img, box, resolution, aligned=True):
62 |         """
63 |         RoiAlign with scale 1.0 and 0 sample ratio.
64 |         """
65 |         if isinstance(resolution, int):
66 |             resolution = (resolution, resolution)
67 |         op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
68 |         input = torch.from_numpy(img[None, None, :, :].astype("float32"))
69 | 
70 |         rois = [0] + list(box)
71 |         rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
72 |         output = op.forward(input, rois).numpy()
73 |         if torch.cuda.is_available():
74 |             output_cuda = op.forward(input.cuda(), rois.cuda()).cpu().numpy()
75 |             self.assertTrue(np.allclose(output, output_cuda))
76 |         return output[0, 0]
77 | 
78 |     def test_empty_box(self):
79 |         img = np.random.rand(5, 5)
80 |         box = [3, 4, 5, 4]
81 |         o = self._simple_roialign(img, box, 7)
82 |         self.assertTrue((o == 0).all())
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/test_roi_pooler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import unittest
 4 | import torch
 5 | 
 6 | from detectron2.modeling.poolers import ROIPooler
 7 | from detectron2.structures import Boxes, RotatedBoxes
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class TestROIPooler(unittest.TestCase):
13 |     def _rand_boxes(self, num_boxes, x_max, y_max):
14 |         coords = torch.rand(num_boxes, 4)
15 |         coords[:, 0] *= x_max
16 |         coords[:, 1] *= y_max
17 |         coords[:, 2] *= x_max
18 |         coords[:, 3] *= y_max
19 |         boxes = torch.zeros(num_boxes, 4)
20 |         boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2])
21 |         boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3])
22 |         boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2])
23 |         boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3])
24 |         return boxes
25 | 
26 |     def _test_roialignv2_roialignrotated_match(self, device):
27 |         pooler_resolution = 14
28 |         canonical_level = 4
29 |         canonical_scale_factor = 2 ** canonical_level
30 |         pooler_scales = (1.0 / canonical_scale_factor,)
31 |         sampling_ratio = 0
32 | 
33 |         N, C, H, W = 2, 4, 10, 8
34 |         N_rois = 10
35 |         std = 11
36 |         mean = 0
37 |         feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
38 | 
39 |         features = [feature.to(device)]
40 | 
41 |         rois = []
42 |         rois_rotated = []
43 |         for _ in range(N):
44 |             boxes = self._rand_boxes(
45 |                 num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor
46 |             )
47 | 
48 |             rotated_boxes = torch.zeros(N_rois, 5)
49 |             rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
50 |             rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
51 |             rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
52 |             rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
53 |             rois.append(Boxes(boxes).to(device))
54 |             rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))
55 | 
56 |         roialignv2_pooler = ROIPooler(
57 |             output_size=pooler_resolution,
58 |             scales=pooler_scales,
59 |             sampling_ratio=sampling_ratio,
60 |             pooler_type="ROIAlignV2",
61 |         )
62 | 
63 |         roialignv2_out = roialignv2_pooler(features, rois)
64 | 
65 |         roialignrotated_pooler = ROIPooler(
66 |             output_size=pooler_resolution,
67 |             scales=pooler_scales,
68 |             sampling_ratio=sampling_ratio,
69 |             pooler_type="ROIAlignRotated",
70 |         )
71 | 
72 |         roialignrotated_out = roialignrotated_pooler(features, rois_rotated)
73 | 
74 |         assert torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4)
75 | 
76 |     def test_roialignv2_roialignrotated_match_cpu(self):
77 |         self._test_roialignv2_roialignrotated_match(device="cpu")
78 | 
79 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
80 |     def test_roialignv2_roialignrotated_match_cuda(self):
81 |         self._test_roialignv2_roialignrotated_match(device="cuda")
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     unittest.main()
86 | 


--------------------------------------------------------------------------------
/tests/test_visualizer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # File:
 4 | 
 5 | import numpy as np
 6 | import unittest
 7 | import torch
 8 | 
 9 | from detectron2.data import MetadataCatalog
10 | from detectron2.structures import Instances
11 | from detectron2.utils.visualizer import Visualizer
12 | 
13 | 
14 | class TestVisualizer(unittest.TestCase):
15 |     def _random_data(self):
16 |         H, W = 100, 100
17 |         N = 10
18 |         img = np.random.rand(H, W, 3) * 255
19 |         boxxy = np.random.rand(N, 2) * (H // 2)
20 |         boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1)
21 | 
22 |         def _rand_poly():
23 |             return np.random.rand(3, 2).flatten() * H
24 | 
25 |         polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)]
26 | 
27 |         mask = np.zeros_like(img[:, :, 0], dtype=np.bool)
28 |         mask[:10, 10:20] = 1
29 | 
30 |         labels = [str(i) for i in range(N)]
31 |         return img, boxes, labels, polygons, [mask] * N
32 | 
33 |     @property
34 |     def metadata(self):
35 |         return MetadataCatalog.get("coco_2017_train")
36 | 
37 |     def test_overlay_instances(self):
38 |         img, boxes, labels, polygons, masks = self._random_data()
39 | 
40 |         v = Visualizer(img, self.metadata)
41 |         output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
42 |         self.assertEqual(output.shape, img.shape)
43 | 
44 |         # Test 2x scaling
45 |         v = Visualizer(img, self.metadata, scale=2.0)
46 |         output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
47 |         self.assertEqual(output.shape[0], img.shape[0] * 2)
48 | 
49 |         # Test overlay masks
50 |         v = Visualizer(img, self.metadata)
51 |         output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image()
52 |         self.assertEqual(output.shape, img.shape)
53 | 
54 |     def test_overlay_instances_no_boxes(self):
55 |         img, boxes, labels, polygons, _ = self._random_data()
56 |         v = Visualizer(img, self.metadata)
57 |         v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
58 | 
59 |     def test_draw_instance_predictions(self):
60 |         img, boxes, _, _, masks = self._random_data()
61 |         num_inst = len(boxes)
62 |         inst = Instances((img.shape[0], img.shape[1]))
63 |         inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
64 |         inst.scores = torch.rand(num_inst)
65 |         inst.pred_boxes = torch.from_numpy(boxes)
66 |         inst.pred_masks = torch.from_numpy(np.asarray(masks))
67 | 
68 |         v = Visualizer(img, self.metadata)
69 |         v.draw_instance_predictions(inst)
70 | 
71 |     def test_correct_output_shape(self):
72 |         img = np.random.rand(928, 928, 3) * 255
73 |         v = Visualizer(img, self.metadata)
74 |         out = v.output.get_image()
75 |         self.assertEqual(out.shape, img.shape)
76 | 


--------------------------------------------------------------------------------
/unit_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019/11/22 19:13
 4 | 
 5 | 
 6 | import numpy as np
 7 | import torch.nn as nn
 8 | import torch
 9 | import os.path as osp
10 | import os
11 | import pickle
12 | 
13 | 
14 | 
15 | def extract_boxes():
16 | 
17 | 
18 |     path_list = os.listdir('./flickr30k_datasets/flickr30k_feat_nms/flickr30k_torch_nms1e4_feat')
19 | 
20 |     precomp_annos = {}
21 |     for img_id in path_list:
22 | 
23 |         feat_path = './flickr30k_datasets/flickr30k_feat_nms/flickr30k_torch_nms1e4_feat/{}'.format(img_id)
24 | 
25 |         with open(osp.join(feat_path), 'rb') as load_f:
26 |             res = pickle.load(load_f)
27 | 
28 |         imgs = img_id.split('.')[0]
29 |         bbox_data = res['boxes']
30 |         img_scale = res['img_scale']
31 | 
32 |         precomp_annos[imgs] = {'boxes': bbox_data, 'img_scale': img_scale}
33 |         print(img_id, 'done')
34 | 
35 |     with open('./flickr30k_datasets/flickr30k_anno/precomp_annos.pkl', 'wb') as dump_f:
36 |         pickle.dump(precomp_annos, dump_f)
37 | 
38 | if __name__ == '__main__':
39 | 
40 |     extract_boxes()
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------