├── .gitignore
├── .idea
    ├── .gitignore
    ├── VisualGroundingR50.iml
    ├── deployment.xml
    ├── dictionaries
    │   └── liuyf.xml
    ├── emacs.xml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── sshConfigs.xml
    ├── vcs.xml
    └── webServers.xml
├── INSTALL.md
├── README.md
├── caffe_extract
    └── extract.py
├── configs
    ├── caffe2
    │   ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    ├── cityscapes
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── e2e_flickr_bottom_up_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_flickr_det_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_flickr_faster_rcnn_R_101_FPN_1x.yaml
    ├── gn_baselines
    │   ├── README.md
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    ├── pascal_voc
    │   ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── quick_schedules
    │   ├── e2e_faster_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── rpn_R_50_C4_quick.yaml
    │   └── rpn_R_50_FPN_quick.yaml
    ├── retinanet
    │   ├── retinanet_R-101-FPN_1x.yaml
    │   ├── retinanet_R-101-FPN_P5_1x.yaml
    │   ├── retinanet_R-50-FPN_1x.yaml
    │   ├── retinanet_R-50-FPN_1x_quick.yaml
    │   ├── retinanet_R-50-FPN_P5_1x.yaml
    │   └── retinanet_X_101_32x8d_FPN_1x.yaml
    └── yaml_hist_MaskRCNN
    │   ├── e2e_VRD_faster_rcnn_R_101_FPN_1x.yaml
    │   ├── e2e_VRD_faster_rcnn_R_50_C4_1x.yaml
    │   ├── e2e_VRD_faster_rcnn_VGG16.yaml
    │   ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
    │   ├── e2e_faster_rcnn_fbnet.yaml
    │   ├── e2e_faster_rcnn_fbnet_600.yaml
    │   ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_fbnet.yaml
    │   ├── e2e_mask_rcnn_fbnet_600.yaml
    │   ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
    │   ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
    │   ├── rpn_R_101_FPN_1x.yaml
    │   ├── rpn_R_50_C4_1x.yaml
    │   ├── rpn_R_50_FPN_1x.yaml
    │   └── rpn_X_101_32x8d_FPN_1x.yaml
├── data_analysis
    ├── VisDetectionBbox.ipynb
    ├── bert.ipynb
    ├── data_analysis.ipynb
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   ├── bbox_transform_pytorch.py
    │   ├── config.py
    │   ├── config2.py
    │   └── nms_wrapper.py
    ├── flicker_recall_check.ipynb
    ├── flicker_recall_check.py
    ├── flickr_recall_check_v1.py
    ├── model_size.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── pretrain_weight
    │   ├── embed_ba_0.npy
    │   ├── embed_ba_1.npy
    │   ├── lstm1_0.npy
    │   ├── lstm1_1.npy
    │   ├── lstm1_2.npy
    │   ├── query_bbox_pred_0.npy
    │   ├── query_bbox_pred_1.npy
    │   ├── query_score_fc_0.npy
    │   ├── query_score_fc_1.npy
    │   ├── qv_fc1_0.npy
    │   └── qv_fc1_1.npy
    ├── sng_parser
    │   ├── __init__.py
    │   ├── _data
    │   │   ├── phrasal-preps.txt
    │   │   ├── phrasal-verbs.txt
    │   │   └── scene-nouns.txt
    │   ├── backends
    │   │   ├── __init__.py
    │   │   ├── backend.py
    │   │   └── spacy_parser.py
    │   ├── database.py
    │   ├── parser.py
    │   └── utils.py
    ├── tools
    │   ├── .gitignore
    │   ├── get-phrasal-verbs.sh
    │   ├── get-scene-nouns.sh
    │   ├── parse-eos.py
    │   └── process-scene-nouns.py
    ├── untitled.txt
    ├── upper_bound.ipynb
    └── utils
    │   ├── __init__.py
    │   ├── blob.py
    │   ├── boxes-Copy1.py
    │   ├── boxes.py
    │   ├── cbam.py
    │   ├── collections.py
    │   ├── colormap.py
    │   ├── cython_bbox.c
    │   ├── cython_bbox.pyx
    │   ├── cython_nms.c
    │   ├── cython_nms.pyx
    │   ├── detectron_weight_helper.py
    │   ├── env.py
    │   ├── fpn.py
    │   ├── image.py
    │   ├── io.py
    │   ├── keypoints.py
    │   ├── logging.py
    │   ├── misc.py
    │   ├── net.py
    │   ├── resnet_weights_helper.py
    │   ├── segms.py
    │   ├── subprocess.py
    │   ├── timer.py
    │   ├── training_stats.py
    │   └── vis.py
├── demo
    ├── README.md
    ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
    ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
    ├── predictor.py
    └── webcam.py
├── killpy.sh
├── maskrcnn_benchmark
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── SigmoidFocalLoss.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── nms.h
    │   └── vision.cpp
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── build.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── concat_dataset.py
    │   │   ├── evaluation
    │   │   │   ├── VG
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── vg_eval.py
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── coco_eval.py
    │   │   │   ├── flickr
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── flickr_eval.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── voc_eval.py
    │   │   ├── flickr.py
    │   │   ├── list_dataset.py
    │   │   ├── visual_genome.py
    │   │   └── voc.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── transforms.py
    │   │   ├── transforms_vg.py
    │   │   └── transforms_vg_bp.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   ├── trainer.py
    │   └── trainer_bak.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── generate_dense_relation.py
    │   ├── generate_sample_relation.py
    │   ├── generate_union_region.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── numerical_stability_softmax.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   ├── sigmoid_focal_loss.py
    │   ├── smooth_l1_loss.py
    │   └── spatial_coordinate.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── bottom_up_resnet.py
    │   │   ├── fbnet.py
    │   │   ├── fbnet_builder.py
    │   │   ├── fbnet_modeldef.py
    │   │   ├── fpn.py
    │   │   ├── resnet.py
    │   │   └── vgg16.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   ├── generalized_rcnn.py
    │   │   └── generalized_rcnn_det.py
    │   ├── make_layers.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── registry.py
    │   ├── relation
    │   │   ├── __init__.py
    │   │   ├── feature_refine.py
    │   │   ├── loss.py
    │   │   └── relation_detection.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── keypoint_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── keypoint_head.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   └── roi_keypoint_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   └── retinanet.py
    │   │   ├── rpn.py
    │   │   └── utils.py
    │   ├── utils.py
    │   └── vg
    │   │   ├── FeatureRefinement.py
    │   │   ├── VisualGraph.py
    │   │   ├── VisualGraphUpdate.py
    │   │   ├── __init__.py
    │   │   ├── loss.py
    │   │   ├── phrase_embedding.py
    │   │   ├── vg_detection.py
    │   │   └── vg_detection_2stage_sep_rel_const.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   ├── keypoint.py
    │   ├── relation_triplet.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── cv2_util.py
    │   ├── debugger.py
    │   ├── direction_word_dict.py
    │   ├── env.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── model_zoo.py
    │   ├── ops.py
    │   ├── registry.py
    │   └── timer.py
├── requirements.txt
├── scripts
    └── train.sh
├── setup.py
├── skipthoughts
    ├── __init__.py
    ├── dropout.py
    ├── gru.py
    ├── skipthoughts.py
    └── version.py
├── tests
    ├── checkpoint.py
    ├── env_tests
    │   └── env.py
    ├── test_backbones.py
    ├── test_box_coder.py
    ├── test_configs.py
    ├── test_data_samplers.py
    ├── test_detectors.py
    ├── test_fbnet.py
    ├── test_feature_extractors.py
    ├── test_metric_logger.py
    ├── test_nms.py
    ├── test_predictors.py
    ├── test_rpn_heads.py
    └── utils.py
└── tools
    ├── cityscapes
        ├── convert_cityscapes_to_coco.py
        └── instances2dict_with_polygons.py
    ├── test_flickr.py
    ├── test_net.py
    └── train_net.py


/.gitignore:
--------------------------------------------------------------------------------
1 | maskrcnn_benchmark/modeling/vg_bak
2 | maskrcnn_benchmark/structure_prediction


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/.idea/VisualGroundingR50.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.6" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="PublishConfigData" autoUpload="Always" serverName="ResNet50" showDialogOnRemoteDnD="false" showAutoUploadSettingsWarning="false">
 4 |     <serverData>
 5 |       <paths name="AI-Docker">
 6 |         <serverdata>
 7 |           <mappings>
 8 |             <mapping deploy="/root/PycharmProjects/VisualGroundingR50" local="$PROJECT_DIR$" web="/" />
 9 |           </mappings>
10 |         </serverdata>
11 |       </paths>
12 |       <paths name="ResNet50">
13 |         <serverdata>
14 |           <mappings>
15 |             <mapping deploy="/public/sist/home/liuyf/PycharmProject/VisualGroundingResNet50" local="$PROJECT_DIR$" web="/" />
16 |           </mappings>
17 |         </serverdata>
18 |       </paths>
19 |     </serverData>
20 |     <option name="myAutoUpload" value="ALWAYS" />
21 |   </component>
22 | </project>


--------------------------------------------------------------------------------
/.idea/dictionaries/liuyf.xml:
--------------------------------------------------------------------------------
 1 | <component name="ProjectDictionaryState">
 2 |   <dictionary name="liuyf">
 3 |     <words>
 4 |       <w>anno</w>
 5 |       <w>atte</w>
 6 |       <w>atten</w>
 7 |       <w>avgpool</w>
 8 |       <w>baidu</w>
 9 |       <w>bphr</w>
10 |       <w>bvis</w>
11 |       <w>conv</w>
12 |       <w>datasets</w>
13 |       <w>downsample</w>
14 |       <w>flickr</w>
15 |       <w>inds</w>
16 |       <w>intra</w>
17 |       <w>keypoint</w>
18 |       <w>keypoints</w>
19 |       <w>logits</w>
20 |       <w>mscoco</w>
21 |       <w>msra</w>
22 |       <w>noent</w>
23 |       <w>phrsbj</w>
24 |       <w>phrtnobj</w>
25 |       <w>phrtnsbj</w>
26 |       <w>pooler</w>
27 |       <w>precomp</w>
28 |       <w>precompute</w>
29 |       <w>pretrained</w>
30 |       <w>rcnn</w>
31 |       <w>relu</w>
32 |       <w>resnet</w>
33 |       <w>segm</w>
34 |       <w>softmax</w>
35 |       <w>topk</w>
36 |       <w>xyxy</w>
37 |     </words>
38 |   </dictionary>
39 | </component>


--------------------------------------------------------------------------------
/.idea/emacs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="EmacsSettings">
4 |     <option name="emacsPath" value="/usr/bin/emacs" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/VisualGroundingR50.iml" filepath="$PROJECT_DIR$/.idea/VisualGroundingR50.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/sshConfigs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="SshConfigs">
 4 |     <configs>
 5 |       <sshConfig authType="PASSWORD" host="10.15.22.198" id="cdb80103-7357-4500-8fbf-2b32dfdf051e" port="22" customName="ResNet50" nameFormat="CUSTOM" username="liuyf" />
 6 |       <sshConfig authType="PASSWORD" host="10.15.89.41" id="ff6b495e-0360-4cf4-89ce-01a3ac3b5fea" port="22033" customName="AI-Docker" nameFormat="CUSTOM" username="root" />
 7 |       <sshConfig authType="PASSWORD" host="10.15.89.41" id="0e737403-6264-454c-ac7d-bec8e1d3a4cf" port="22033" customName="AI-Docker" nameFormat="CUSTOM" username="root" />
 8 |     </configs>
 9 |   </component>
10 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="WebServers">
 4 |     <option name="servers">
 5 |       <webServer id="ac36bcc4-55d4-49ac-aa0b-d86a8ed29f0d" name="ResNet50" url="http://10.15.22.198">
 6 |         <fileTransfer host="10.15.22.198" port="22" rootFolder="/sist-database/home/hexuming/liuyf" accessType="SFTP">
 7 |           <advancedOptions>
 8 |             <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
 9 |           </advancedOptions>
10 |           <option name="port" value="22" />
11 |         </fileTransfer>
12 |       </webServer>
13 |       <webServer id="4bb54298-0afd-4eca-bf94-76c07c3a8713" name="AI-Docker" url="http://10.15.89.41">
14 |         <fileTransfer host="10.15.89.41" port="22033" accessType="SFTP">
15 |           <advancedOptions>
16 |             <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
17 |           </advancedOptions>
18 |           <option name="port" value="22033" />
19 |         </fileTransfer>
20 |       </webServer>
21 |     </option>
22 |   </component>
23 | </project>


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements:
 4 | - PyTorch 1.0 from a nightly release. It **will not** work with 1.0 nor 1.0.1. Installation instructions can be found in https://pytorch.org/get-started/locally/
 5 | - torchvision from master
 6 | - cocoapi
 7 | - yacs
 8 | - matplotlib
 9 | - GCC >= 4.9
10 | - (optional) OpenCV for the webcam demo
11 | 
12 | 
13 | ### Option 1: Step-by-step installation
14 | 
15 | ```bash
16 | 
17 | # install pycocotools
18 | cd $INSTALL_DIR
19 | git clone https://github.com/cocodataset/cocoapi.git
20 | cd cocoapi/PythonAPI
21 | python setup.py build_ext install
22 | 
23 | 
24 | # install allennlp
25 | pip install allennlp
26 | 
27 | # install PyTorch Detection
28 | cd $INSTALL_DIR
29 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
30 | cd maskrcnn-benchmark
31 | 
32 | 
33 | # the following will install the lib with
34 | # symbolic links, so that you can modify
35 | # the files if you want and won't need to
36 | # re-build it
37 | python setup.py build develop
38 | 
39 | install allennlp
40 | 
41 | 
42 | ```


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LCMCG.Pytorch
 2 | 
 3 | This repo is the official implementation of ["Learning Cross-Modal Context Graph for Visual Grounding"](https://arxiv.org/pdf/1911.09042.pdf) (AAAI2020)
 4 | ## Installation
 5 | Check [INSTALL.md](INSTALL.md) for installation instructions.
 6 | 
 7 | ## pre-requirements
 8 | 1. Download the flickr30k dataset in this [link](http://bryanplummer.com/Flickr30kEntities/)
 9 | 2. Pre-computed bounding boxes are extracted by using [FasterRCNN](https://github.com/facebookresearch/maskrcnn-benchmark) \
10 | We use the config "e2e_faster_rcnn_R_50_C4_1x.yaml" to train the object detector on MSCOCO dataset and extract the feature map at C4 layer.
11 | 3. Language graph extraction by using [SceneGraphParser](https://github.com/vacancy/SceneGraphParser). I have uploaded the sg_anno.json into Google drive. You can download it now.
12 | 4. Some pre-processing data, like sentence annotations, box annotations.
13 | 5. You need to create the './flickr_datasets' folder and put all annotation in it. I would highly recommend you to figure all 
14 | the data path out in this project. You can refer this two file "maskrcnn_benchmark/config/paths_catalog.py" and "maskrcnn_benchmark/data/flickr.py" for details.
15 | 
16 | The pretrained object detector weights and annotations can be found here at baidu-disk (link:https://pan.baidu.com/s/1bYbGUsHcZJQHele87MzcMg  password:5ie6) or [google drive](https://drive.google.com/drive/folders/1dRp61muWDNuFG-V9KMKcZ26zR8f2ujci?usp=sharing)
17 | 
18 | 
19 | ## training
20 | 
21 | 1. You can train our model by running the scripts 
22 | ```bash
23 | sh scripts/train.sh
24 | ```
25 | 
26 | ""
27 | 
28 | ## citation
29 | If you are interested in our paper, please cite it.
30 | ```bash
31 | @inproceedings{liu2019learning,
32 |   title={Learning Cross-modal Context Graph for Visual Grounding},
33 |   author={Liu, Yongfei and Wan, Bo and Zhu, Xiaodan and He, Xuming},
34 |   booktitle={Proceedings of the AAAI Conference on Artificial Intelligenc}
35 |   year={2020}
36 | }
37 | ```
38 | 


--------------------------------------------------------------------------------
/caffe_extract/extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019-06-03 16:46
 4 | # @Author  : Yongfei Liu
 5 | # @Email   : liuyf3@shanghaitech.edu.cn
 6 | 
 7 | import caffe
 8 | import numpy as np
 9 | import argparse
10 | import os
11 | 
12 | def extract_caffe_model(model, weights, output_path):
13 |   """extract caffe model's parameters to numpy array, and write them to files
14 |   Args:
15 |     model: path of '.prototxt'
16 |     weights: path of '.caffemodel'
17 |     output_path: output path of numpy params
18 |   Returns:
19 |     None
20 |   """
21 |   net = caffe.Net(model, caffe.TEST)
22 |   net.copy_from(weights)
23 | 
24 |   if not os.path.exists(output_path):
25 |     os.makedirs(output_path)
26 | 
27 |   for item in net.params.items():
28 |     name, layer = item
29 |     print('convert layer: ' + name)
30 | 
31 |     num = 0
32 |     for p in net.params[name]:
33 |       np.save(output_path + '/' + str(name) + '_' + str(num), p.data)
34 |       num += 1
35 | 
36 | if __name__ == '__main__':
37 |   parser = argparse.ArgumentParser()
38 |   parser.add_argument("--model", help="model prototxt path .prototxt")
39 |   parser.add_argument("--weights", help="caffe model weights path .caffemodel")
40 |   parser.add_argument("--output", help="output path")
41 |   args = parser.parse_args()
42 |   extract_caffe_model(args.model, args.weights, args.output)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 |   META_ARCHITECTURE: "GeneralizedRCNN"
3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 |   TEST: ("coco_2014_minival",)
6 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 |   SIZE_DIVISIBILITY: 32
30 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
 4 |   ROI_MASK_HEAD:
 5 |     PREDICTOR: "MaskRCNNC4Predictor"
 6 |     SHARE_BOX_FEATURE_EXTRACTOR: True
 7 |   MASK_ON: True
 8 | DATASETS:
 9 |   TEST: ("coco_2014_minival",)
10 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-152-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 | DATASETS:
25 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
26 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
27 | DATALOADER:
28 |   SIZE_DIVISIBILITY: 32
29 | SOLVER:
30 |   BASE_LR: 0.01
31 |   WEIGHT_DECAY: 0.0001
32 |   STEPS: (18000,)
33 |   MAX_ITER: 24000
34 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
35 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/e2e_flickr_det_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNNDet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     NMS: 0.4
18 |     DETECTIONS_PER_IMG: 100
19 | 
20 |   VG_ON: True
21 |   RELATION_ON: False
22 | 
23 |   VG:
24 |     FIXED_BACKBONE: True
25 |     FIXED_RPN: True
26 |     FIXED_ROI_HEAD: True
27 |     FG_IOU_THRESHOLD: 0.5
28 |     BG_IOU_THRESHOLD: 0.2
29 |     VOCAB_FILE: "datasets/skip-thoughts/vocab.json"
30 |     SKIP_THROUGH_DATA_DIR: "datasets/skip-thoughts"
31 | 
32 |   ROI_BOX_HEAD:
33 |     POOLER_RESOLUTION: 7
34 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
35 |     POOLER_SAMPLING_RATIO: 2
36 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 |     NUM_CLASSES: 151
39 | 
40 | 
41 | DATASETS:
42 |   TRAIN: ("flickr_train" ,)
43 |   TEST: ("flickr_val" ,)
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 |   NUM_WORKERS: 2
47 | SOLVER:
48 |   BASE_LR: 0.05
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (40000, )
51 |   CHECKPOINT_PERIOD: 4000
52 |   START_SAVE_CHECKPOINT: 4000
53 |   MAX_ITER: 80000
54 | 


--------------------------------------------------------------------------------
/configs/e2e_flickr_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     NMS: 0.4
18 |     DETECTIONS_PER_IMG: 100
19 | 
20 |   VG_ON: True
21 |   RELATION_ON: False
22 | 
23 |   VG:
24 |     FIXED_BACKBONE: True
25 |     FIXED_RESNET: True
26 |     FIXED_RPN: True
27 |     FIXED_ROI_HEAD: True
28 |     FG_IOU_THRESHOLD: 0.5
29 |     BG_IOU_THRESHOLD: 0.2
30 |     VOCAB_FILE: "datasets/skip-thoughts/vocab.json"
31 |     SKIP_THROUGH_DATA_DIR: "datasets/skip-thoughts"
32 |     RESNET_PARAMS_FILE: './outputs/bottom-up-pretrained/bottomup_pretrained_10_100.pth'
33 |     FIXED_EMBEDDING: False
34 | 
35 |   ROI_BOX_HEAD:
36 |     POOLER_RESOLUTION: 7
37 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
38 |     POOLER_SAMPLING_RATIO: 2
39 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
40 |     PREDICTOR: "FPNPredictor"
41 |     NUM_CLASSES: 151
42 | 
43 | 
44 | DATASETS:
45 |   TRAIN: ("flickr_train" ,)
46 |   TEST: ("flickr_val" ,)
47 | DATALOADER:
48 |   SIZE_DIVISIBILITY: 32
49 |   NUM_WORKERS: 2
50 | SOLVER:
51 |   BASE_LR: 0.05
52 |   WEIGHT_DECAY: 0.0001
53 |   STEPS: (60000, 100000, )
54 |   CHECKPOINT_PERIOD: 5000
55 |   START_SAVE_CHECKPOINT: 5000
56 |   MAX_ITER: 120001
57 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/README.md:
--------------------------------------------------------------------------------
 1 | ### Group Normalization
 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494)  
 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883)  
 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md)  
 5 | 
 6 | 
 7 | ### Performance
 8 | |      case                  |    Type      |  lr schd  |  im/gpu | bbox AP | mask AP |
 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:|
10 | |   R-50-FPN, GN (paper)     | finetune     |    2x     |   2     |   40.3  |  35.7   |
11 | |   R-50-FPN, GN (implement) | finetune     |    2x     |   2     |   40.2  |  36.0   |
12 | |   R-50-FPN, GN (paper)     | from scratch |    3x     |   2     |   39.5  |  35.2   |
13 | |   R-50-FPN, GN (implement) | from scratch |    3x     |   2     |   38.9  |  35.1   |
14 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | DATALOADER:
40 |   SIZE_DIVISIBILITY: 32
41 | SOLVER:
42 |   # Assume 8 gpus
43 |   BASE_LR: 0.02
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (60000, 80000)
46 |   MAX_ITER: 90000
47 |   IMS_PER_BATCH: 16
48 | TEST:
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 | DATASETS:
39 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
40 |   TEST: ("coco_2014_minival",)
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 8 gpus
45 |   BASE_LR: 0.02
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (60000, 80000)
48 |   MAX_ITER: 90000
49 |   IMS_PER_BATCH: 16
50 | TEST:
51 |   IMS_PER_BATCH: 8
52 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 |   ROI_MASK_HEAD:
37 |     USE_GN: True # use GN for mask head
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     CONV_LAYERS: (256, 256, 256, 256)
40 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
41 |     PREDICTOR: "MaskRCNNC4Predictor"
42 |     POOLER_RESOLUTION: 14
43 |     POOLER_SAMPLING_RATIO: 2
44 |     RESOLUTION: 28
45 |     SHARE_BOX_FEATURE_EXTRACTOR: False
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
49 |   TEST: ("coco_2014_minival",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   # Assume 8 gpus
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (60000, 80000)
57 |   MAX_ITER: 90000
58 |   IMS_PER_BATCH: 16
59 | TEST:
60 |   IMS_PER_BATCH: 8
61 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 |   ROI_MASK_HEAD:
39 |     USE_GN: True # use GN for mask head
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     CONV_LAYERS: (256, 256, 256, 256)
42 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
43 |     PREDICTOR: "MaskRCNNC4Predictor"
44 |     POOLER_RESOLUTION: 14
45 |     POOLER_SAMPLING_RATIO: 2
46 |     RESOLUTION: 28
47 |     SHARE_BOX_FEATURE_EXTRACTOR: False
48 |   MASK_ON: True
49 | DATASETS:
50 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
51 |   TEST: ("coco_2014_minival",)
52 | DATALOADER:
53 |   SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 |   # Assume 8 gpus
56 |   BASE_LR: 0.02
57 |   WEIGHT_DECAY: 0.0001
58 |   STEPS: (60000, 80000)
59 |   MAX_ITER: 90000
60 |   IMS_PER_BATCH: 16
61 | TEST:
62 |   IMS_PER_BATCH: 8
63 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 | DATASETS:
38 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
39 |   TEST: ("coco_2014_minival",)
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   # Assume 8 gpus
44 |   BASE_LR: 0.02
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (210000, 250000)
47 |   MAX_ITER: 270000
48 |   IMS_PER_BATCH: 16
49 | TEST:
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 | DATASETS:
40 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
41 |   TEST: ("coco_2014_minival",)
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 8 gpus
46 |   BASE_LR: 0.02
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (210000, 250000)
49 |   MAX_ITER: 270000
50 |   IMS_PER_BATCH: 16
51 | TEST:
52 |   IMS_PER_BATCH: 8
53 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 |   ROI_MASK_HEAD:
38 |     USE_GN: True # use GN for mask head
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     CONV_LAYERS: (256, 256, 256, 256)
41 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
42 |     PREDICTOR: "MaskRCNNC4Predictor"
43 |     POOLER_RESOLUTION: 14
44 |     POOLER_SAMPLING_RATIO: 2
45 |     RESOLUTION: 28
46 |     SHARE_BOX_FEATURE_EXTRACTOR: False
47 |   MASK_ON: True
48 | DATASETS:
49 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
50 |   TEST: ("coco_2014_minival",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   # Assume 8 gpus
55 |   BASE_LR: 0.02
56 |   WEIGHT_DECAY: 0.0001
57 |   STEPS: (210000, 250000)
58 |   MAX_ITER: 270000
59 |   IMS_PER_BATCH: 16
60 | TEST:
61 |   IMS_PER_BATCH: 8
62 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 |   ROI_MASK_HEAD:
40 |     USE_GN: True # use GN for mask head
41 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
42 |     CONV_LAYERS: (256, 256, 256, 256)
43 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
44 |     PREDICTOR: "MaskRCNNC4Predictor"
45 |     POOLER_RESOLUTION: 14
46 |     POOLER_SAMPLING_RATIO: 2
47 |     RESOLUTION: 28
48 |     SHARE_BOX_FEATURE_EXTRACTOR: False
49 |   MASK_ON: True
50 | DATASETS:
51 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
52 |   TEST: ("coco_2014_minival",)
53 | DATALOADER:
54 |   SIZE_DIVISIBILITY: 32
55 | SOLVER:
56 |   # Assume 8 gpus
57 |   BASE_LR: 0.02
58 |   WEIGHT_DECAY: 0.0001
59 |   STEPS: (210000, 250000)
60 |   MAX_ITER: 270000
61 |   IMS_PER_BATCH: 16
62 | TEST:
63 |   IMS_PER_BATCH: 8
64 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.001
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (50000, )
17 |   MAX_ITER: 70000
18 |   IMS_PER_BATCH: 1
19 | TEST:
20 |   IMS_PER_BATCH: 1
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 17500
18 |   IMS_PER_BATCH: 4
19 | TEST:
20 |   IMS_PER_BATCH: 4
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 21
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("voc_2012_train_cocostyle",)
35 |   TEST: ("voc_2012_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 | DATASETS:
10 |   TRAIN: ("coco_2014_minival",)
11 |   TEST: ("coco_2014_minival",)
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (600,)
14 |   MAX_SIZE_TRAIN: 1000
15 |   MIN_SIZE_TEST: 800
16 |   MAX_SIZE_TEST: 1000
17 | SOLVER:
18 |   BASE_LR: 0.005
19 |   WEIGHT_DECAY: 0.0001
20 |   STEPS: (1500,)
21 |   MAX_ITER: 2000
22 |   IMS_PER_BATCH: 2
23 | TEST:
24 |   IMS_PER_BATCH: 2
25 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 | DATASETS:
25 |   TRAIN: ("coco_2014_minival",)
26 |   TEST: ("coco_2014_minival",)
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (600,)
29 |   MAX_SIZE_TRAIN: 1000
30 |   MIN_SIZE_TEST: 800
31 |   MAX_SIZE_TEST: 1000
32 | DATALOADER:
33 |   SIZE_DIVISIBILITY: 32
34 | SOLVER:
35 |   BASE_LR: 0.005
36 |   WEIGHT_DECAY: 0.0001
37 |   STEPS: (1500,)
38 |   MAX_ITER: 2000
39 |   IMS_PER_BATCH: 4
40 | TEST:
41 |   IMS_PER_BATCH: 2
42 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 | DATASETS:
28 |   TRAIN: ("coco_2014_minival",)
29 |   TEST: ("coco_2014_minival",)
30 | INPUT:
31 |   MIN_SIZE_TRAIN: (600,)
32 |   MAX_SIZE_TRAIN: 1000
33 |   MIN_SIZE_TEST: 800
34 |   MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (1500,)
41 |   MAX_ITER: 2000
42 |   IMS_PER_BATCH: 2
43 | TEST:
44 |   IMS_PER_BATCH: 2
45 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |     NUM_CLASSES: 2
25 |   ROI_KEYPOINT_HEAD:
26 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
27 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
28 |     PREDICTOR: "KeypointRCNNPredictor"
29 |     POOLER_RESOLUTION: 14
30 |     POOLER_SAMPLING_RATIO: 2
31 |     RESOLUTION: 56
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   KEYPOINT_ON: True
34 | DATASETS:
35 |   TRAIN: ("keypoints_coco_2014_minival",)
36 |   TEST: ("keypoints_coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
39 |   MAX_SIZE_TRAIN: 1000
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1000
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (1500,)
48 |   MAX_ITER: 2000
49 |   IMS_PER_BATCH: 4
50 | TEST:
51 |   IMS_PER_BATCH: 2
52 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |   ROI_MASK_HEAD:
10 |     PREDICTOR: "MaskRCNNC4Predictor"
11 |     SHARE_BOX_FEATURE_EXTRACTOR: True
12 |   MASK_ON: True
13 | DATASETS:
14 |   TRAIN: ("coco_2014_minival",)
15 |   TEST: ("coco_2014_minival",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (600,)
18 |   MAX_SIZE_TRAIN: 1000
19 |   MIN_SIZE_TEST: 800
20 |   MAX_SIZE_TEST: 1000
21 | SOLVER:
22 |   BASE_LR: 0.005
23 |   WEIGHT_DECAY: 0.0001
24 |   STEPS: (1500,)
25 |   MAX_ITER: 2000
26 |   IMS_PER_BATCH: 4
27 | TEST:
28 |   IMS_PER_BATCH: 2
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (1500,)
47 |   MAX_ITER: 2000
48 |   IMS_PER_BATCH: 4
49 | TEST:
50 |   IMS_PER_BATCH: 2
51 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2014_minival",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (600,)
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (1500,)
50 |   MAX_ITER: 2000
51 |   IMS_PER_BATCH: 2
52 | TEST:
53 |   IMS_PER_BATCH: 2
54 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_minival",)
10 |   TEST: ("coco_2014_minival",)
11 | INPUT:
12 |   MIN_SIZE_TRAIN: (600,)
13 |   MAX_SIZE_TRAIN: 1000
14 |   MIN_SIZE_TEST: 800
15 |   MAX_SIZE_TEST: 1000
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (1500,)
20 |   MAX_ITER: 2000
21 |   IMS_PER_BATCH: 4
22 | TEST:
23 |   IMS_PER_BATCH: 2
24 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_minival",)
17 |   TEST: ("coco_2014_minival",)
18 | INPUT:
19 |   MIN_SIZE_TRAIN: (600,)
20 |   MAX_SIZE_TRAIN: 1000
21 |   MIN_SIZE_TEST: 800
22 |   MAX_SIZE_TEST: 1000
23 | DATALOADER:
24 |   SIZE_DIVISIBILITY: 32
25 | SOLVER:
26 |   BASE_LR: 0.005
27 |   WEIGHT_DECAY: 0.0001
28 |   STEPS: (1500,)
29 |   MAX_ITER: 2000
30 |   IMS_PER_BATCH: 4
31 | TEST:
32 |   IMS_PER_BATCH: 2
33 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800, )
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800, )
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (3500,)
47 |   MAX_ITER: 4000
48 |   IMS_PER_BATCH: 4
49 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800,)
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |     STRIDE_IN_1X1: False
11 |     NUM_GROUPS: 32
12 |     WIDTH_PER_GROUP: 8
13 |   RPN:
14 |     USE_FPN: True
15 |     FG_IOU_THRESHOLD: 0.5
16 |     BG_IOU_THRESHOLD: 0.4
17 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |     PRE_NMS_TOP_N_TRAIN: 2000
19 |     PRE_NMS_TOP_N_TEST: 1000
20 |     POST_NMS_TOP_N_TEST: 1000
21 |     FPN_POST_NMS_TOP_N_TEST: 1000
22 |   ROI_HEADS:
23 |     USE_FPN: True
24 |     BATCH_SIZE_PER_IMAGE: 256
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   RETINANET:
32 |     SCALES_PER_OCTAVE: 3
33 |     STRADDLE_THRESH: -1
34 |     FG_IOU_THRESHOLD: 0.5
35 |     BG_IOU_THRESHOLD: 0.4
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800, )
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (240000, 320000)
51 |   MAX_ITER: 360000
52 |   IMS_PER_BATCH: 4
53 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     NMS: 0.5
18 |     DETECTIONS_PER_IMG: 128
19 | 
20 | 
21 |   RELATION_ON: True
22 | 
23 |   RELATION:
24 |     MAKE_PAIR_PROPOSAL_CNT: 128
25 |     MAX_PROPOSAL_PAIR: 2048
26 |     PHRASE_POOLED_SIZE: 8
27 |     PHRASE_CLUSTER: True
28 |     APPLY_REGRESSION: True
29 |     USE_DETECTION_RESULT_FOR_RELATION: True
30 |     FIXED_ROI_HEAD: True
31 |     FIXED_RPN: True
32 |     RELATION_CLASS: 51
33 |     NEG_POS_PHRASE_PROP_RATE: 4
34 |     SEPARATED_BACKBONE: True
35 |     TOPK_TRIPLETS: (50, 100, )
36 |     SAMPLE_DETECTION_BOX: False
37 | 
38 |     FEATURE_REFINE:
39 |       MASSAGE_PASSING: 0
40 |       MP_UNIT_OUTPUT_DIM: 128
41 | 
42 |   ROI_BOX_HEAD:
43 |     POOLER_RESOLUTION: 7
44 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
45 |     POOLER_SAMPLING_RATIO: 2
46 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
47 |     PREDICTOR: "FPNPredictor"
48 |     NUM_CLASSES: 151
49 | 
50 | 
51 | DATASETS:
52 | #  TRAIN: ("vg_150_50_train_fat" ,)
53 |   TRAIN: ("vg_150_50_train_small" ,)
54 | #  TRAIN: ("vg_150_50_train" ,)
55 |   TEST: ("vg_150_50_test_small" ,)
56 | DATALOADER:
57 |   SIZE_DIVISIBILITY: 32
58 |   NUM_WORKERS: 2
59 | SOLVER:
60 |   BASE_LR: 0.001
61 |   WEIGHT_DECAY: 0.0001
62 |   STEPS: (100000, )
63 |   CHECKPOINT_PERIOD: 5000
64 |   START_SAVE_CHECKPOINT: 5000
65 |   MAX_ITER: 200000
66 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | 
 8 |   RELATION_ON: True
 9 | 
10 |   RELATION:
11 |     MAKE_PAIR_PROPOSAL_CNT: 128
12 |     MAX_PROPOSAL_PAIR: 2048
13 |     PHRASE_POOLED_SIZE: 8
14 |     PHRASE_CLUSTER: True
15 |     APPLY_REGRESSION: True
16 |     USE_DETECTION_RESULT_FOR_RELATION: True
17 |     FIXED_ROI_HEAD: True
18 |     FIXED_RPN: True
19 |     RELATION_CLASS: 51
20 |     NEG_POS_PHRASE_PROP_RATE: 4
21 |     SEPARATED_BACKBONE: True
22 |     TOPK_TRIPLETS: (50, 100, )
23 |     SAMPLE_DETECTION_BOX: False
24 | 
25 |   ROI_BOX_HEAD:
26 |     NUM_CLASSES: 151
27 |     FEATURE_EXTRACTOR: "ResNet50Conv5ROIFeatureExtractorFlatten"
28 |     PREDICTOR: "FastRCNNPredictorFlatten"
29 | 
30 | 
31 | DATASETS:
32 |   TRAIN: ("vg_150_50_train" ,)
33 |   TEST: ("vg_150_50_test_small" ,)
34 | 
35 | 
36 | SOLVER:
37 |   BASE_LR: 0.001
38 |   WEIGHT_DECAY: 0.0001
39 |   STEPS: (21000, )
40 |   CHECKPOINT_PERIOD: 3000
41 |   START_SAVE_CHECKPOINT: 3000
42 |   MAX_ITER: 50400
43 |   IMS_PER_BATCH: 8
44 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_VGG16.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "VGG16"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | 
 8 |   BACKBONE:
 9 |     CONV_BODY: "VGG16"
10 |     FREEZE_CONV_BODY_AT: 5
11 | 
12 |   RELATION_ON: False
13 | 
14 |   RELATION:
15 |     MAKE_PAIR_PROPOSAL_CNT: 128
16 |     MAX_PROPOSAL_PAIR: 2048
17 |     PHRASE_POOLED_SIZE: 8
18 |     PHRASE_CLUSTER: True
19 |     APPLY_REGRESSION: True
20 |     USE_DETECTION_RESULT_FOR_RELATION: True
21 |     FIXED_ROI_HEAD: True
22 |     FIXED_RPN: True
23 |     RELATION_CLASS: 51
24 |     NEG_POS_PHRASE_PROP_RATE: 4
25 |     SEPARATED_BACKBONE: True
26 |     TOPK_TRIPLETS: (50, 100, )
27 |     SAMPLE_DETECTION_BOX: False
28 | 
29 |   ROI_BOX_HEAD:
30 |     NUM_CLASSES: 151
31 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"  # work like a single feature layer FPN
32 |     PREDICTOR: "FPNPredictor"
33 | 
34 | 
35 | DATASETS:
36 |   TRAIN: ("vg_150_50_train_fat" ,)
37 |   TEST: ("vg_150_50_test_small" ,)
38 | 
39 | 
40 | SOLVER:
41 |   BASE_LR: 0.001
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (90000, )
44 |   CHECKPOINT_PERIOD: 3000
45 |   START_SAVE_CHECKPOINT: 3000
46 |   MAX_ITER: 200000
47 |   IMS_PER_BATCH: 8
48 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_val",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
 9 |   TEST: ("coco_2014_minival",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 8
16 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RPN:
 7 |     USE_FPN: True
 8 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
 9 |     PRE_NMS_TOP_N_TRAIN: 2000
10 |     PRE_NMS_TOP_N_TEST: 1000
11 |     POST_NMS_TOP_N_TEST: 1000
12 |     FPN_POST_NMS_TOP_N_TEST: 1000
13 |   ROI_HEADS:
14 |     USE_FPN: True
15 |   ROI_BOX_HEAD:
16 |     POOLER_RESOLUTION: 7
17 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
18 |     POOLER_SAMPLING_RATIO: 2
19 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
20 |     PREDICTOR: "FPNPredictor"
21 |   RESNETS:
22 |     BACKBONE_OUT_CHANNELS: 256
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 |   SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 |   BASE_LR: 0.01
33 |   WEIGHT_DECAY: 0.0001
34 |   STEPS: (120000, 160000)
35 |   MAX_ITER: 180000
36 |   IMS_PER_BATCH: 8
37 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 100
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 512
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (320, )
40 |   MAX_SIZE_TRAIN: 640
41 |   MIN_SIZE_TEST: 320
42 |   MAX_SIZE_TEST: 640
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet_chamv1a_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "cham_v1a"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 128
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.045
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (90000, 120000)
34 |   MAX_ITER: 135000
35 |   IMS_PER_BATCH: 96  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 |   TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_MASK_HEAD:
 8 |     PREDICTOR: "MaskRCNNC4Predictor"
 9 |     SHARE_BOX_FEATURE_EXTRACTOR: True
10 |   MASK_ON: True
11 | DATASETS:
12 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 |   TEST: ("coco_2014_minival",)
14 | SOLVER:
15 |   BASE_LR: 0.01
16 |   WEIGHT_DECAY: 0.0001
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   IMS_PER_BATCH: 8
20 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 |   TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 |   TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.01
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (120000, 160000)
44 |   MAX_ITER: 180000
45 |   IMS_PER_BATCH: 8
46 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: -1.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 512
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
10 |   TEST: ("coco_2014_minival",)
11 | SOLVER:
12 |   BASE_LR: 0.02
13 |   WEIGHT_DECAY: 0.0001
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |     STRIDE_IN_1X1: False
10 |     NUM_GROUPS: 32
11 |     WIDTH_PER_GROUP: 8
12 |   RPN:
13 |     USE_FPN: True
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 2000
17 |     FPN_POST_NMS_TOP_N_TEST: 2000
18 | DATASETS:
19 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
20 |   TEST: ("coco_2014_minival",)
21 | DATALOADER:
22 |   SIZE_DIVISIBILITY: 32
23 | SOLVER:
24 |   BASE_LR: 0.02
25 |   WEIGHT_DECAY: 0.0001
26 |   STEPS: (60000, 80000)
27 |   MAX_ITER: 90000
28 | 


--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 
8 | 
9 | from data_analysis.fast_rcnn import nms_wrapper


--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/bbox_transform_pytorch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_transform_inv(boxes, deltas):
 5 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
 6 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
 7 |     ctr_x = boxes[:, 0] + 0.5 * widths
 8 |     ctr_y = boxes[:, 1] + 0.5 * heights
 9 | 
10 |     dx = deltas[:, 0]
11 |     dy = deltas[:, 1]
12 |     dw = deltas[:, 2]
13 |     dh = deltas[:, 3]
14 | 
15 |     pred_ctr_x = dx * widths + ctr_x
16 |     pred_ctr_y = dy * heights + ctr_y
17 |     pred_w = torch.exp(dw) * widths
18 |     pred_h = torch.exp(dh) * heights
19 | 
20 |     pred_boxes = deltas.clone()
21 |     # x1
22 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
23 |     # y1
24 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
25 |     # x2
26 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
27 |     # y2
28 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
29 | 
30 |     return pred_boxes
31 | 
32 | 
33 | def clip_boxes(boxes, im_shape):
34 |     ret = boxes.clone()
35 |     # x1
36 |     ret[:, 0::4] = boxes[:, 0::4].clamp(0, im_shape[1] - 1)
37 |     ret[:, 1::4] = boxes[:, 1::4].clamp(0, im_shape[0] - 1)
38 |     ret[:, 2::4] = boxes[:, 2::4].clamp(0, im_shape[1] - 1)
39 |     ret[:, 3::4] = boxes[:, 3::4].clamp(0, im_shape[0] - 1)
40 |     return ret
41 | 
42 | def clip_rois(boxes, im_shape):
43 |     ret = boxes.clone()
44 |     # x1
45 |     ret[:, 1::5] = boxes[:, 1::5].clamp(0, im_shape[1] - 1)
46 |     ret[:, 2::5] = boxes[:, 2::5].clamp(0, im_shape[0] - 1)
47 |     ret[:, 3::5] = boxes[:, 3::5].clamp(0, im_shape[1] - 1)
48 |     ret[:, 4::5] = boxes[:, 4::5].clamp(0, im_shape[0] - 1)
49 |     return ret


--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from data_analysis.nms.cpu_nms import cpu_nms
 9 | from data_analysis.nms.gpu_nms import gpu_nms
10 | 
11 | from .config import cfg
12 | 
13 | 
14 | def nms(dets, thresh, force_cpu=False):
15 |     """Dispatch to either CPU or GPU NMS implementations."""
16 | 
17 |     if dets.shape[0] == 0:
18 |         return []
19 |     if cfg.USE_GPU_NMS and not force_cpu:
20 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
21 |     else:
22 |         return cpu_nms(dets, thresh)
23 | 


--------------------------------------------------------------------------------
/data_analysis/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/data_analysis/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/nms/__init__.py


--------------------------------------------------------------------------------
/data_analysis/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/data_analysis/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/data_analysis/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/data_analysis/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/embed_ba_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/embed_ba_0.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/embed_ba_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/embed_ba_1.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_0.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_1.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_2.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_bbox_pred_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_bbox_pred_0.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_bbox_pred_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_bbox_pred_1.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_score_fc_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_score_fc_0.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_score_fc_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_score_fc_1.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/qv_fc1_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/qv_fc1_0.npy


--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/qv_fc1_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/qv_fc1_1.npy


--------------------------------------------------------------------------------
/data_analysis/sng_parser/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : __init__.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/21/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | from .parser import *
13 | from .backends import *
14 | from .utils import *
15 | 
16 | __version__ = (0, 1, 0)
17 | __author__ = 'Jiayuan Mao'
18 | __email__ = 'maojiayuan@gmail.com'
19 | 
20 | 


--------------------------------------------------------------------------------
/data_analysis/sng_parser/_data/phrasal-preps.txt:
--------------------------------------------------------------------------------
 1 | in addition to
 2 | in front of
 3 | in reference to
 4 | in regard to
 5 | in spite of
 6 | on account of
 7 | on top of
 8 | on side of
 9 | on the side of
10 | with regard to
11 | 
12 | 


--------------------------------------------------------------------------------
/data_analysis/sng_parser/backends/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : __init__.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/21/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | from .spacy_parser import *
13 | 
14 | 


--------------------------------------------------------------------------------
/data_analysis/sng_parser/backends/backend.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : backend.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/21/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | 
13 | class ParserBackend(object):
14 |     """
15 |     Based class for all parser backends. This class
16 |     specifies the methods that should be override by subclasses.
17 |     """
18 | 
19 |     def parse(self, sentence):
20 |         raise NotImplementedError()
21 | 
22 | 


--------------------------------------------------------------------------------
/data_analysis/sng_parser/database.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : database.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/23/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | import os.path as osp
13 | 
14 | 
15 | _caches = dict()
16 | 
17 | 
18 | def load_list(filename):
19 |     if filename not in _caches:
20 |         out = set()
21 |         for x in open(osp.join(osp.dirname(__file__), '_data', filename)):
22 |             x = x.strip()
23 |             if len(x) > 0:
24 |                 out.add(x)
25 |         _caches[filename] = out
26 |     return _caches[filename]
27 | 
28 | 
29 | def is_phrasal_verb(verb):
30 |     return verb in load_list('phrasal-verbs.txt')
31 | 
32 | 
33 | def is_phrasal_prep(prep):
34 |     return prep in load_list('phrasal-preps.txt')
35 | 
36 | 
37 | def is_scene_noun(noun):
38 |     head = noun.split(' ')[-1]
39 |     s = load_list('scene-nouns.txt') 
40 |     return noun in s or head in s
41 | 
42 | 


--------------------------------------------------------------------------------
/data_analysis/sng_parser/utils.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : utils.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/21/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | import functools
13 | import tabulate
14 | 
15 | 
16 | __all__ = ['tprint']
17 | 
18 | 
19 | def tprint(graph, file=None, show_entities=True, show_relations=True):
20 |     """
21 |     Print a scene graph as a table.
22 |     The printed strings contains only essential information about the parsed scene graph.
23 |     """
24 | 
25 |     _print = functools.partial(print, file=file)
26 | 
27 |     if show_entities:
28 |         _print('Entities:')
29 | 
30 |         entities_data = [
31 |             [e['head'].lower(), e['span'].lower(), ','.join([ x['span'].lower() for x in e['modifiers'] ])]
32 |             for e in graph['entities']
33 |         ]
34 |         _print(tabulate.tabulate(entities_data, headers=['Head', 'Span', 'Modifiers'], tablefmt=_tabulate_format))
35 | 
36 |     if show_relations:
37 |         _print('Relations:')
38 | 
39 |         entities = graph['entities']
40 |         relations_data = [
41 |             [
42 |                 entities[rel['subject']]['head'].lower(),
43 |                 rel['relation'].lower(),
44 |                 entities[rel['object']]['head'].lower()
45 |             ]
46 |             for rel in graph['relations']
47 |         ]
48 |         _print(tabulate.tabulate(relations_data, headers=['Subject', 'Relation', 'Object'], tablefmt=_tabulate_format))
49 | 
50 | 
51 | _tabulate_format = tabulate.TableFormat(
52 |         lineabove=tabulate.Line("+", "-", "+", "+"),
53 |         linebelowheader=tabulate.Line("|", "-", "+", "|"),
54 |         linebetweenrows=None,
55 |         linebelow=tabulate.Line("+", "-", "+", "+"),
56 |         headerrow=tabulate.DataRow("|", "|", "|"),
57 |         datarow=tabulate.DataRow("|", "|", "|"),
58 |         padding=1, with_header_hide=None
59 | )
60 | 


--------------------------------------------------------------------------------
/data_analysis/tools/.gitignore:
--------------------------------------------------------------------------------
1 | /*.txt
2 | 
3 | 


--------------------------------------------------------------------------------
/data_analysis/tools/get-phrasal-verbs.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # get-phrasal-verbs.sh
 4 | # Copyright (C) 2018 Jiayuan Mao <maojiayuan@gmail.com>
 5 | #
 6 | # Distributed under terms of the MIT license.
 7 | 
 8 | set -x
 9 | 
10 | rm -f ../sng_parser/_data/phrasal-verbs.txt
11 | 
12 | for alpha in {A..Z}
13 | do
14 |     echo curling $alpha
15 |     curl \
16 |         -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36" \
17 |         http://www.english-for-students.com/Phrasal-Verbs-$alpha.html | python3 parse-eos.py >> ../sng_parser/_data/phrasal-verbs.txt
18 |     sleep 3
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/data_analysis/tools/get-scene-nouns.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # get-scene-nouns.sh
 4 | # Copyright (C) 2018 Jiayuan Mao <maojiayuan@gmail.com>
 5 | #
 6 | # Distributed under terms of the MIT license.
 7 | #
 8 | 
 9 | set -x
10 | 
11 | curl https://raw.githubusercontent.com/CSAILVision/places365/master/IO_places365.txt | \
12 |     python3 process-scene-nouns.py > \
13 |     ../sng_parser/_data/scene-nouns.txt
14 | 


--------------------------------------------------------------------------------
/data_analysis/tools/parse-eos.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : parse-eos.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/23/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | import re
13 | 
14 | 
15 | def main():
16 |     import sys
17 | 
18 |     text = '\n'.join(sys.stdin.readlines())
19 |     groups = re.findall("<b eza[\s\S]*?>(.*?)<\/b>", text, re.MULTILINE)
20 | 
21 |     if len(groups) == 0:
22 |         print('Error to get the page.')
23 | 
24 |     current = groups[len(groups) // 3 * 2][0].lower()
25 |     print('Current', current, file=sys.stderr)
26 | 
27 |     for g in groups:
28 |         g = g.lower()
29 |         if not g.startswith(current):
30 |             print('Filter: ', g, file=sys.stderr)
31 |         else:
32 |             print(g)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 
38 | 


--------------------------------------------------------------------------------
/data_analysis/tools/process-scene-nouns.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : process-scene-nouns.py
 4 | # Author : Jiayuan Mao
 5 | # Email  : maojiayuan@gmail.com
 6 | # Date   : 08/23/2018
 7 | #
 8 | # This file is part of SceneGraphParser.
 9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 | 
12 | import sys
13 | import spacy
14 | 
15 | nlp = spacy.load('en')
16 | 
17 | extra_nouns = ['cabin', 'airport', 'terminal', 'arcade', 'park', 'apartment', 'gallery', 'school', 'studio', 'loft', 'field', 'factory', 'showroom', 'bank', 'banquet', 'court', 'salon', 'laboratory', 'station', 'store', 'lab', 'room', 'conference', 'dorm', 'lobby', 'entrance', 'restaurant', 'market', 'office', 'theater', 'skating', 'jail', 'kindergarden', 'dock', 'gym', 'cubicles', 'residential', 'mall', 'resort', 'hole', 'hostel']
18 | 
19 | 
20 | def main():
21 |     nouns = set()
22 |     for line in sys.stdin:
23 |         line, _ = line.split(' ')
24 |         line = line.split('/')[2:]
25 |         for x in line:
26 |             parts = x.split('_')
27 |             nouns.add(' '.join(parts))
28 | 
29 |     nouns.update(set(extra_nouns))
30 | 
31 |     for n in sorted(nouns):
32 |         print(n)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 
38 | 


--------------------------------------------------------------------------------
/data_analysis/untitled.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/untitled.txt


--------------------------------------------------------------------------------
/data_analysis/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/utils/__init__.py


--------------------------------------------------------------------------------
/data_analysis/utils/collections.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """A simple attribute dictionary used for representing configuration options."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | 
24 | class AttrDict(dict):
25 | 
26 |     IMMUTABLE = '__immutable__'
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         super(AttrDict, self).__init__(*args, **kwargs)
30 |         self.__dict__[AttrDict.IMMUTABLE] = False
31 | 
32 |     def __getattr__(self, name):
33 |         if name in self.__dict__:
34 |             return self.__dict__[name]
35 |         elif name in self:
36 |             return self[name]
37 |         else:
38 |             raise AttributeError(name)
39 | 
40 |     def __setattr__(self, name, value):
41 |         if not self.__dict__[AttrDict.IMMUTABLE]:
42 |             if name in self.__dict__:
43 |                 self.__dict__[name] = value
44 |             else:
45 |                 self[name] = value
46 |         else:
47 |             raise AttributeError(
48 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 |                 format(name, value)
50 |             )
51 | 
52 |     def immutable(self, is_immutable):
53 |         """Set immutability to is_immutable and recursively apply the setting
54 |         to all nested AttrDicts.
55 |         """
56 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 |         # Recursively set immutable state
58 |         for v in self.__dict__.values():
59 |             if isinstance(v, AttrDict):
60 |                 v.immutable(is_immutable)
61 |         for v in self.values():
62 |             if isinstance(v, AttrDict):
63 |                 v.immutable(is_immutable)
64 | 
65 |     def is_immutable(self):
66 |         return self.__dict__[AttrDict.IMMUTABLE]
67 | 


--------------------------------------------------------------------------------
/data_analysis/utils/detectron_weight_helper.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for loading pretrained weights from Detectron pickle files
 2 | """
 3 | 
 4 | import pickle
 5 | import re
 6 | import torch
 7 | 
 8 | 
 9 | def load_detectron_weight(net, detectron_weight_file):
10 |     name_mapping, orphan_in_detectron = net.detectron_weight_mapping
11 | 
12 |     with open(detectron_weight_file, 'rb') as fp:
13 |         src_blobs = pickle.load(fp, encoding='latin1')
14 |     if 'blobs' in src_blobs:
15 |         src_blobs = src_blobs['blobs']
16 | 
17 |     params = net.state_dict()
18 |     for p_name, p_tensor in params.items():
19 |         d_name = name_mapping[p_name]
20 |         if isinstance(d_name, str):  # maybe str, None or True
21 |             v = src_blobs.get(d_name)
22 |             if v is not None:
23 |                 p_tensor.copy_(torch.Tensor(v))
24 | 
25 | def resnet_weights_name_pattern():
26 |     pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+")
27 |     return pattern
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     """Testing"""
32 |     from pprint import pprint
33 |     import sys
34 |     sys.path.insert(0, '..')
35 |     from modeling.model_builder import Generalized_RCNN
36 |     from core.config import cfg, cfg_from_file
37 | 
38 |     cfg.MODEL.NUM_CLASSES = 81
39 |     cfg_from_file('../../cfgs/res50_mask.yml')
40 |     net = Generalized_RCNN()
41 | 
42 |     # pprint(list(net.state_dict().keys()), width=1)
43 | 
44 |     mapping, orphans = net.detectron_weight_mapping
45 |     state_dict = net.state_dict()
46 | 
47 |     for k in mapping.keys():
48 |         assert k in state_dict, '%s' % k
49 | 
50 |     rest = set(state_dict.keys()) - set(mapping.keys())
51 |     assert len(rest) == 0
52 | 


--------------------------------------------------------------------------------
/data_analysis/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Environment helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import os
24 | import sys
25 | 
26 | # Default value of the CMake install prefix
27 | _CMAKE_INSTALL_PREFIX = '/usr/local'
28 | 
29 | 
30 | def get_runtime_dir():
31 |     """Retrieve the path to the runtime directory."""
32 |     return os.getcwd()
33 | 
34 | 
35 | def get_py_bin_ext():
36 |     """Retrieve python binary extension."""
37 |     return '.py'
38 | 
39 | 
40 | def set_up_matplotlib():
41 |     """Set matplotlib up."""
42 |     import matplotlib
43 |     # Use a non-interactive backend
44 |     matplotlib.use('Agg')
45 | 
46 | 
47 | def exit_on_error():
48 |     """Exit from a detectron tool when there's an error."""
49 |     sys.exit(1)
50 | 


--------------------------------------------------------------------------------
/data_analysis/utils/image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Image helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import cv2
24 | import numpy as np
25 | 
26 | 
27 | def aspect_ratio_rel(im, aspect_ratio):
28 |     """Performs width-relative aspect ratio transformation."""
29 |     im_h, im_w = im.shape[:2]
30 |     im_ar_w = int(round(aspect_ratio * im_w))
31 |     im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
32 |     return im_ar
33 | 
34 | 
35 | def aspect_ratio_abs(im, aspect_ratio):
36 |     """Performs absolute aspect ratio transformation."""
37 |     im_h, im_w = im.shape[:2]
38 |     im_area = im_h * im_w
39 | 
40 |     im_ar_w = np.sqrt(im_area * aspect_ratio)
41 |     im_ar_h = np.sqrt(im_area / aspect_ratio)
42 |     assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
43 | 
44 |     im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
45 |     return im_ar
46 | 


--------------------------------------------------------------------------------
/data_analysis/utils/timer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import time
 7 | 
 8 | 
 9 | class Timer(object):
10 |   """A simple timer."""
11 | 
12 |   def __init__(self):
13 |     self.reset()
14 | 
15 |   def tic(self):
16 |     # using time.time instead of time.clock because time time.clock
17 |     # does not normalize for multithreading
18 |     self.start_time = time.time()
19 | 
20 |   def toc(self, average=True):
21 |     self.diff = time.time() - self.start_time
22 |     self.total_time += self.diff
23 |     self.calls += 1
24 |     self.average_time = self.total_time / self.calls
25 |     if average:
26 |       return self.average_time
27 |     else:
28 |       return self.diff
29 | 
30 |   def reset(self):
31 |     self.total_time = 0.
32 |     self.calls = 0
33 |     self.start_time = 0.
34 |     self.diff = 0.
35 |     self.average_time = 0.
36 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
 4 | 
 5 | 
 6 | ### With your preferred environment
 7 | 
 8 | You can start it by running it from this folder, using one of the following commands:
 9 | ```bash
10 | # by default, it runs on the GPU
11 | # for best results, use min-image-size 800
12 | python webcam.py --min-image-size 800
13 | # can also run it on the CPU
14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
15 | # or change the model that you want to use
16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
19 | ```
20 | 
21 | ### With Docker
22 | 
23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
24 | 
25 | Adjust permissions of the X server host (be careful with this step, refer to 
26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
27 | 
28 | ```bash
29 | xhost +
30 | ``` 
31 | 
32 | Then run a container with the demo:
33 |  
34 | ```
35 | docker run --rm -it \
36 |     -e DISPLAY=${DISPLAY} \
37 |     --privileged \
38 |     -v /tmp/.X11-unix:/tmp/.X11-unix \
39 |     --device=/dev/video0:/dev/video0 \
40 |     --ipc=host maskrcnn-benchmark \
41 |     python demo/webcam.py --min-image-size 300
42 | ```
43 | 
44 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 
45 | the volume mapping may vary depending on your platform*
46 | 


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png


--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import cv2
 4 | 
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import COCODemo
 7 | 
 8 | import time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |     )
19 |     parser.add_argument(
20 |         "--confidence-threshold",
21 |         type=float,
22 |         default=0.7,
23 |         help="Minimum score for the prediction to be shown",
24 |     )
25 |     parser.add_argument(
26 |         "--min-image-size",
27 |         type=int,
28 |         default=224,
29 |         help="Smallest size of the image to feed to the model. "
30 |             "Model was trained with 800, which gives best results",
31 |     )
32 |     parser.add_argument(
33 |         "--show-mask-heatmaps",
34 |         dest="show_mask_heatmaps",
35 |         help="Show a heatmap probability for the top masks-per-dim masks",
36 |         action="store_true",
37 |     )
38 |     parser.add_argument(
39 |         "--masks-per-dim",
40 |         type=int,
41 |         default=2,
42 |         help="Number of heatmaps per dimension to show",
43 |     )
44 |     parser.add_argument(
45 |         "opts",
46 |         help="Modify model config options using the command-line",
47 |         default=None,
48 |         nargs=argparse.REMAINDER,
49 |     )
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     # load config from file and command-line arguments
54 |     cfg.merge_from_file(args.config_file)
55 |     cfg.merge_from_list(args.opts)
56 |     cfg.freeze()
57 | 
58 |     # prepare object that handles inference plus adds predictions on top of image
59 |     coco_demo = COCODemo(
60 |         cfg,
61 |         confidence_threshold=args.confidence_threshold,
62 |         show_mask_heatmaps=args.show_mask_heatmaps,
63 |         masks_per_dim=args.masks_per_dim,
64 |         min_image_size=args.min_image_size,
65 |     )
66 | 
67 |     cam = cv2.VideoCapture(0)
68 |     while True:
69 |         start_time = time.time()
70 |         ret_val, img = cam.read()
71 |         composite = coco_demo.run_on_opencv_image(img)
72 |         print("Time: {:.2f} s / img".format(time.time() - start_time))
73 |         cv2.imshow("COCO detections", composite)
74 |         if cv2.waitKey(1) == 27:
75 |             break  # esc to quit
76 |     cv2.destroyAllWindows()
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/killpy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ps -ef|grep python|grep -v grep|cut -c 9-15|xargs kill -9
3 | 
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .defaults import _C as cfg
 3 | 
 4 | 
 5 | def adjustment_for_relation(cfg):
 6 |     if cfg.MODEL.RELATION_ON:
 7 |         cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = \
 8 |             cfg.MODEL.RELATION.MAKE_PAIR_PROPOSAL_CNT * 2
 9 |         if cfg.MODEL.RELATION.USE_DETECTION_RESULT_FOR_RELATION:
10 |             cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = \
11 |                 cfg.MODEL.RELATION.MAKE_PAIR_PROPOSAL_CNT
12 |     return cfg


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
14 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
15 | }
16 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.modeling import registry
 3 | from maskrcnn_benchmark.structures.image_list import to_image_list
 4 | 
 5 | 
 6 | @registry.BATCH_COLLATOR.register("DetectionOnlyCollator")
 7 | class BatchCollator(object):
 8 |     """
 9 |     From a list of samples from the dataset,
10 |     returns the batched images and targets.
11 |     This should be passed to the DataLoader
12 |     """
13 | 
14 |     def __init__(self, size_divisible=0):
15 |         self.size_divisible = size_divisible
16 | 
17 |     def __call__(self, batch):
18 |         transposed_batch = list(zip(*batch))
19 |         images = to_image_list(transposed_batch[0], self.size_divisible)
20 |         targets = transposed_batch[1]
21 |         img_ids = transposed_batch[2]
22 |         return images, targets, img_ids
23 | 
24 | 
25 | @registry.BATCH_COLLATOR.register("RelationCollator")
26 | class RelationBatchCollator:
27 |     def __init__(self, size_divisible=0):
28 |         self.size_divisible = size_divisible
29 | 
30 |     def __call__(self, batch):
31 |         transposed_batch = list(zip(*batch))
32 |         images = to_image_list(transposed_batch[0], self.size_divisible)
33 |         det_targets = transposed_batch[1]
34 |         rel_targets = transposed_batch[2]
35 |         img_ids = transposed_batch[3]
36 | 
37 |         return images, (det_targets, rel_targets), img_ids
38 | 
39 | 
40 | @registry.BATCH_COLLATOR.register("VGCollator")
41 | class VGBatchCollator:
42 |     def __init__(self, size_divisible=0):
43 |         self.size_divisible = size_divisible
44 | 
45 |     def __call__(self, batch):
46 |         transposed_batch = list(zip(*batch))
47 | 
48 |         # images = to_image_list(transposed_batch[0], self.size_divisible)
49 |         images = transposed_batch[0]
50 |         targets = transposed_batch[1]
51 |         img_id = transposed_batch[2]
52 |         phrase_ids = transposed_batch[3]
53 |         sent_id = transposed_batch[4]
54 |         sentence = transposed_batch[5]
55 |         precompute_bbox = transposed_batch[6]
56 |         precompute_score = transposed_batch[7]
57 |         feature_map = transposed_batch[8]
58 |         vocab_label_elmo = transposed_batch[9]
59 |         sent_sg = transposed_batch[10]
60 |         topN_box = transposed_batch[11]
61 | 
62 |         return images, targets, img_id, phrase_ids, sent_id, sentence, precompute_bbox, precompute_score, feature_map, vocab_label_elmo, sent_sg, topN_box
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from .coco import COCODataset
 3 | # from .concat_dataset import ConcatDataset
 4 | # from .visual_genome import VGDataset
 5 | # from .voc import PascalVOCDataset
 6 | from .flickr import Flickr
 7 | 
 8 | # __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset", "Flickr"]
 9 | __all__ = ["Flickr"]
10 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/VG/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from maskrcnn_benchmark.config import cfg
 4 | from .vg_eval import eval_detection, eval_relation
 5 | 
 6 | 
 7 | def vg_evaluation(
 8 |         dataset,
 9 |         predictions,
10 |         output_folder,
11 |         box_only,
12 |         iou_types,
13 |         expected_results,
14 |         expected_results_sigma_tol, ):
15 |     logger = logging.getLogger(__name__)
16 |     # split prediction
17 |     det_predictions = []
18 |     rel_predictions = []
19 |     for prop, res in predictions:
20 |         if cfg.MODEL.RELATION_ON:
21 |             det_predictions.append((prop, res[0]))
22 |             rel_predictions.append(res[1])
23 |         else:
24 |             det_predictions.append((prop, res))
25 |     proposal_eval_res = None
26 |     det_eval_results = None
27 |     coco_results = None
28 |     rel_eval_results = None
29 |     proposal_eval_res, det_eval_results, \
30 |     coco_results = eval_detection(dataset=dataset,
31 |                                   predictions=det_predictions,
32 |                                   box_only=box_only,
33 |                                   output_folder=output_folder,
34 |                                   iou_types=iou_types,
35 |                                   expected_results=expected_results,
36 |                                   expected_results_sigma_tol=expected_results_sigma_tol, )
37 |     if cfg.MODEL.RELATION_ON:
38 |         # relation evaluations
39 |         rel_eval_results = eval_relation(dataset=dataset,
40 |                                          predictions=rel_predictions,
41 |                                          output_folder=output_folder)
42 | 
43 |     logger.info("vg evaluation done")
44 |     return proposal_eval_res, det_eval_results, rel_eval_results, coco_results
45 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | # from .VG import vg_evaluation
 3 | # from .coco import coco_evaluation
 4 | # from .voc import voc_evaluation
 5 | from .flickr import flick_evaluation
 6 | 
 7 | 
 8 | def evaluate(dataset, predictions, image_ids, curr_iter, output_folder, **kwargs):
 9 |     """evaluate dataset using different methods based on dataset type.
10 |     Args:
11 |         dataset: Dataset object
12 |         predictions(list[BoxList]): each item in the list represents the
13 |             prediction results for one image.
14 |         output_folder: output folder, to save evaluation files or results.
15 |         **kwargs: other args.
16 |     Returns:
17 |         evaluation result
18 |     """
19 |     args = dict(
20 |         dataset=dataset, predictions=predictions,image_ids=image_ids, curr_iter=curr_iter, output_folder=output_folder
21 |     )
22 |     # if isinstance(dataset, datasets.COCODataset):
23 |     #     return coco_evaluation(**args)
24 |     #
25 |     # elif isinstance(dataset, datasets.VGDataset):
26 |     #     return vg_evaluation(**args)
27 |     #
28 |     # elif isinstance(dataset, datasets.PascalVOCDataset):
29 |     #     return voc_evaluation(**args)
30 |     if isinstance(dataset, datasets.Flickr):
31 |         return flick_evaluation(**args)
32 | 
33 |     else:
34 |         dataset_name = dataset.__class__.__name__
35 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
36 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |         dataset,
 6 |         predictions,
 7 |         output_folder,
 8 |         box_only,
 9 |         iou_types,
10 |         expected_results,
11 |         expected_results_sigma_tol,
12 | ):
13 |     return do_coco_evaluation(
14 |         dataset=dataset,
15 |         predictions=predictions,
16 |         box_only=box_only,
17 |         output_folder=output_folder,
18 |         iou_types=iou_types,
19 |         expected_results=expected_results,
20 |         expected_results_sigma_tol=expected_results_sigma_tol,
21 |     )
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/flickr/__init__.py:
--------------------------------------------------------------------------------
1 | from .flickr_eval import eval_recall
2 | 
3 | 
4 | def flick_evaluation(dataset, predictions, image_ids,curr_iter, output_folder):
5 |     return eval_recall(dataset, predictions, image_ids, curr_iter, output_folder)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import build_transforms
3 | from .transforms import Compose
4 | from .transforms import Normalize
5 | from .transforms import RandomHorizontalFlip
6 | from .transforms import Resize
7 | from .transforms import ToTensor
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms_vg_bp as TBP
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         # flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |         flip_prob = 0
11 |     else:
12 |         min_size = cfg.INPUT.MIN_SIZE_TEST
13 |         max_size = cfg.INPUT.MAX_SIZE_TEST
14 |         flip_prob = 0
15 | 
16 |     to_bgr255 = cfg.INPUT.TO_BGR255
17 |     normalize_transform = TBP.Normalize(
18 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
19 |     )
20 | 
21 |     To255 = TBP.To255(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255)
22 | 
23 |     transform = TBP.Compose(
24 |         [
25 |             TBP.ResizeAndNormalize(min_size, max_size, cfg.INPUT.PIXEL_MEAN, cfg.INPUT.PIXEL_STD),
26 |             TBP.ToTensor(),
27 |         ]
28 |     )
29 |     return transform
30 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import ConvTranspose2d
 7 | from .misc import BatchNorm2d
 8 | from .misc import interpolate
 9 | from .nms import nms
10 | from .roi_align import ROIAlign
11 | from .roi_align import roi_align
12 | from .roi_pool import ROIPool
13 | from .roi_pool import roi_pool
14 | from .smooth_l1_loss import smooth_l1_loss
15 | from .sigmoid_focal_loss import SigmoidFocalLoss
16 | 
17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool",
18 |            "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate",
19 |            "BatchNorm2d", "FrozenBatchNorm2d", "SigmoidFocalLoss"
20 |           ]
21 | 
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         scale = self.weight * self.running_var.rsqrt()
21 |         bias = self.bias - self.running_mean * scale
22 |         scale = scale.reshape(1, -1, 1, 1)
23 |         bias = bias.reshape(1, -1, 1, 1)
24 |         return x * scale + bias
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from maskrcnn_benchmark import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from maskrcnn_benchmark import _C
 7 | 
 8 | # TODO: Use JIT to replace CUDA implementation in the future.
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets)
13 |         num_classes = logits.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         losses = _C.sigmoid_focalloss_forward(
19 |             logits, targets, num_classes, gamma, alpha
20 |         )
21 |         return losses
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, d_loss):
26 |         logits, targets = ctx.saved_tensors
27 |         num_classes = ctx.num_classes
28 |         gamma = ctx.gamma
29 |         alpha = ctx.alpha
30 |         d_loss = d_loss.contiguous()
31 |         d_logits = _C.sigmoid_focalloss_backward(
32 |             logits, targets, d_loss, num_classes, gamma, alpha
33 |         )
34 |         return d_logits, None, None, None, None
35 | 
36 | 
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 | 
39 | 
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 |     num_classes = logits.shape[1]
42 |     gamma = gamma[0]
43 |     alpha = alpha[0]
44 |     dtype = targets.dtype
45 |     device = targets.device
46 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 | 
48 |     t = targets.unsqueeze(1)
49 |     p = torch.sigmoid(logits)
50 |     term1 = (1 - p) ** gamma * torch.log(p)
51 |     term2 = p ** gamma * torch.log(1 - p)
52 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 | 
54 | 
55 | class SigmoidFocalLoss(nn.Module):
56 |     def __init__(self, gamma, alpha):
57 |         super(SigmoidFocalLoss, self).__init__()
58 |         self.gamma = gamma
59 |         self.alpha = alpha
60 | 
61 |     def forward(self, logits, targets):
62 |         device = logits.device
63 |         if logits.is_cuda:
64 |             loss_func = sigmoid_focal_loss_cuda
65 |         else:
66 |             loss_func = sigmoid_focal_loss_cpu
67 | 
68 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
69 |         return loss.sum()
70 | 
71 |     def __repr__(self):
72 |         tmpstr = self.__class__.__name__ + "("
73 |         tmpstr += "gamma=" + str(self.gamma)
74 |         tmpstr += ", alpha=" + str(self.alpha)
75 |         tmpstr += ")"
76 |         return tmpstr
77 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/spatial_coordinate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019-07-09 13:40
 4 | # @Author  : Yongfei Liu
 5 | # @Email   : liuyf3@shanghaitech.edu.cn
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | 
10 | 
11 | 
12 | def meshgrid_generation(feat):
13 | 
14 |     b, c, h, w = feat.shape
15 | 
16 |     device = feat.get_device()
17 |     half_h = h/2
18 |     half_w = w/2
19 | 
20 |     grid_h, grid_w = torch.meshgrid(torch.arange(h), torch.arange(w))
21 |     grid_h = grid_h.float()
22 |     grid_w = grid_w.float()
23 |     grid_h = grid_h/half_h - 1
24 |     grid_w = grid_w/half_w - 1
25 |     spatial_coord = torch.cat((grid_h[None,None, :,:], grid_w[None, None, :, :]), 1)
26 |     spatial_coord = spatial_coord.to(device)
27 | 
28 |     return spatial_coord
29 | 
30 | 
31 | def get_spatial_feat(precomp_boxes):
32 | 
33 |     bbox = precomp_boxes.bbox
34 |     bbox_size = [precomp_boxes.size[0], precomp_boxes.size[1]]  ## width, height
35 |     bbox_area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1])
36 |     bbox_area_ratio = bbox_area / (bbox_size[0] * bbox_area[1])
37 |     bbox_area_ratio = bbox_area_ratio.unsqueeze(1)  # 100 * 1
38 |     device_id = precomp_boxes.bbox.get_device()
39 |     bbox_size.extend(bbox_size)
40 |     bbox_size = torch.FloatTensor(np.array(bbox_size).astype(np.float32)).to(device_id)
41 |     bbox = bbox / bbox_size
42 |     vis_spatial = torch.cat((bbox, bbox_area_ratio), 1)
43 |     return vis_spatial
44 | 
45 | if __name__ == '__main__':
46 | 
47 |     feat = torch.ones(3,1,50,50)
48 |     meshgrid_generation(feat=feat)
49 | 
50 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | from . import fbnet
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/vgg16.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torchvision.models as models
 3 | 
 4 | 
 5 | class VGG16(nn.Module):
 6 |     def __init__(self, cfg):
 7 |         super(VGG16, self).__init__()
 8 |         self.features = models.vgg16_bn(pretrained=True).features
 9 |         self.out_channels = 512
10 |         cnt = 0
11 |         for each in self.features:
12 |             if cnt >= cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT:
13 |                 break
14 |             cnt += 1
15 |             set_trainable(each, requires_grad=False)
16 | 
17 |     def forward(self, im_data):
18 |         x = self.features(im_data)
19 |         return [x]  # for the following process
20 | 
21 | 
22 | def set_trainable(model, requires_grad):
23 |     set_trainable_param(model.parameters(), requires_grad)
24 | 
25 | 
26 | def set_trainable_param(parameters, requires_grad):
27 |     for param in parameters:
28 |         param.requires_grad = requires_grad
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | from .generalized_rcnn_det import GeneralizedRCNNDet
 4 | 
 5 | 
 6 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, "GeneralizedRCNNDet": GeneralizedRCNNDet}
 7 | 
 8 | 
 9 | def build_detection_model(cfg):
10 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
11 |     return meta_arch(cfg)
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | RPN_HEADS = Registry()
 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 8 | ROI_BOX_PREDICTOR = Registry()
 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
10 | ROI_KEYPOINT_PREDICTOR = Registry()
11 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
12 | ROI_MASK_PREDICTOR = Registry()
13 | 
14 | BATCH_COLLATOR = Registry()


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/relation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/relation/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .inference import make_roi_keypoint_post_processor
 4 | from .loss import make_roi_keypoint_loss_evaluator
 5 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 6 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg, in_channels):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 |         self.predictor = make_roi_keypoint_predictor(
15 |             cfg, self.feature_extractor.out_channels)
16 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
17 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 | 
19 |     def forward(self, features, proposals, targets=None):
20 |         """
21 |         Arguments:
22 |             features (list[Tensor]): feature-maps from possibly several levels
23 |             proposals (list[BoxList]): proposal boxes
24 |             targets (list[BoxList], optional): the ground-truth targets.
25 | 
26 |         Returns:
27 |             x (Tensor): the result of the feature extractor
28 |             proposals (list[BoxList]): during training, the original proposals
29 |                 are returned. During testing, the predicted boxlists are returned
30 |                 with the `mask` field set
31 |             losses (dict[Tensor]): During training, returns the losses for the
32 |                 head. During testing, returns an empty dict.
33 |         """
34 |         if self.training:
35 |             with torch.no_grad():
36 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
37 | 
38 |         x = self.feature_extractor(features, proposals)
39 |         kp_logits = self.predictor(x)
40 | 
41 |         if not self.training:
42 |             result = self.post_processor(kp_logits, proposals)
43 |             return x, result, {}
44 | 
45 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
46 | 
47 |         return x, proposals, dict(loss_kp=loss_kp)
48 | 
49 | 
50 | def build_roi_keypoint_head(cfg, in_channels):
51 |     return ROIKeypointHead(cfg, in_channels)
52 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.layers import Conv2d
 5 | from maskrcnn_benchmark.modeling import registry
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler
 7 | 
 8 | 
 9 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
10 | class KeypointRCNNFeatureExtractor(nn.Module):
11 |     def __init__(self, cfg, in_channels):
12 |         super(KeypointRCNNFeatureExtractor, self).__init__()
13 | 
14 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
15 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
16 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
17 |         pooler = Pooler(
18 |             output_size=(resolution, resolution),
19 |             scales=scales,
20 |             sampling_ratio=sampling_ratio,
21 |         )
22 |         self.pooler = pooler
23 | 
24 |         input_features = in_channels
25 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
26 |         next_feature = input_features
27 |         self.blocks = []
28 |         for layer_idx, layer_features in enumerate(layers, 1):
29 |             layer_name = "conv_fcn{}".format(layer_idx)
30 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
31 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
32 |             nn.init.constant_(module.bias, 0)
33 |             self.add_module(layer_name, module)
34 |             next_feature = layer_features
35 |             self.blocks.append(layer_name)
36 |         self.out_channels = layer_features
37 | 
38 |     def forward(self, x, proposals):
39 |         x = self.pooler(x, proposals)
40 |         for layer_name in self.blocks:
41 |             x = F.relu(getattr(self, layer_name)(x))
42 |         return x
43 | 
44 | 
45 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
46 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
47 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
48 |     ]
49 |     return func(cfg, in_channels)
50 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from maskrcnn_benchmark import layers
 4 | from maskrcnn_benchmark.modeling import registry
 5 | 
 6 | 
 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 8 | class KeypointRCNNPredictor(nn.Module):
 9 |     def __init__(self, cfg, in_channels):
10 |         super(KeypointRCNNPredictor, self).__init__()
11 |         input_features = in_channels
12 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 |         deconv_kernel = 4
14 |         self.kps_score_lowres = layers.ConvTranspose2d(
15 |             input_features,
16 |             num_keypoints,
17 |             deconv_kernel,
18 |             stride=2,
19 |             padding=deconv_kernel // 2 - 1,
20 |         )
21 |         nn.init.kaiming_normal_(
22 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 |         )
24 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
25 |         self.up_scale = 2
26 |         self.out_channels = num_keypoints
27 | 
28 |     def forward(self, x):
29 |         x = self.kps_score_lowres(x)
30 |         x = layers.interpolate(
31 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 |         )
33 |         return x
34 | 
35 | 
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 |     return func(cfg, in_channels)
39 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | from maskrcnn_benchmark.modeling import registry
 8 | 
 9 | 
10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
11 | class MaskRCNNC4Predictor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(MaskRCNNC4Predictor, self).__init__()
14 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 |         num_inputs = in_channels
17 | 
18 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
19 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
20 | 
21 |         for name, param in self.named_parameters():
22 |             if "bias" in name:
23 |                 nn.init.constant_(param, 0)
24 |             elif "weight" in name:
25 |                 # Caffe2 implementation uses MSRAFill, which in fact
26 |                 # corresponds to kaiming_normal_ in PyTorch
27 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
28 | 
29 |     def forward(self, x):
30 |         x = F.relu(self.conv5_mask(x))
31 |         return self.mask_fcn_logits(x)
32 | 
33 | 
34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
35 | class MaskRCNNConv1x1Predictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(MaskRCNNConv1x1Predictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         num_inputs = in_channels
40 | 
41 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
42 | 
43 |         for name, param in self.named_parameters():
44 |             if "bias" in name:
45 |                 nn.init.constant_(param, 0)
46 |             elif "weight" in name:
47 |                 # Caffe2 implementation uses MSRAFill, which in fact
48 |                 # corresponds to kaiming_normal_ in PyTorch
49 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
50 | 
51 |     def forward(self, x):
52 |         return self.mask_fcn_logits(x)
53 | 
54 | 
55 | def make_roi_mask_predictor(cfg, in_channels):
56 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
57 |     return func(cfg, in_channels)
58 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Utility functions minipulating the prediction layers
 4 | """
 5 | 
 6 | from ..utils import cat
 7 | 
 8 | import torch
 9 | 
10 | def permute_and_flatten(layer, N, A, C, H, W):
11 |     layer = layer.view(N, -1, C, H, W)
12 |     layer = layer.permute(0, 3, 4, 1, 2)
13 |     layer = layer.reshape(N, -1, C)
14 |     return layer
15 | 
16 | 
17 | def concat_box_prediction_layers(box_cls, box_regression):
18 |     box_cls_flattened = []
19 |     box_regression_flattened = []
20 |     # for each feature level, permute the outputs to make them be in the
21 |     # same format as the labels. Note that the labels are computed for
22 |     # all feature levels concatenated, so we keep the same representation
23 |     # for the objectness and the box_regression
24 |     for box_cls_per_level, box_regression_per_level in zip(
25 |         box_cls, box_regression
26 |     ):
27 |         N, AxC, H, W = box_cls_per_level.shape
28 |         Ax4 = box_regression_per_level.shape[1]
29 |         A = Ax4 // 4
30 |         C = AxC // A
31 |         box_cls_per_level = permute_and_flatten(
32 |             box_cls_per_level, N, A, C, H, W
33 |         )
34 |         box_cls_flattened.append(box_cls_per_level)
35 | 
36 |         box_regression_per_level = permute_and_flatten(
37 |             box_regression_per_level, N, A, 4, H, W
38 |         )
39 |         box_regression_flattened.append(box_regression_per_level)
40 |     # concatenate on the first dimension (representing the feature levels), to
41 |     # take into account the way the labels were generated (with all feature maps
42 |     # being concatenated as well)
43 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
44 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
45 |     return box_cls, box_regression
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/vg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/vg/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR
 5 | 
 6 | def make_optimizer(cfg, model):
 7 | 
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 | 
11 |         if not value.requires_grad:
12 |             continue
13 |         print('gradient', key)
14 |         lr = cfg.SOLVER.BASE_LR
15 |         if "body" in key or "head" in key:
16 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.RESNET_LR_FACTOR
17 |         elif "fpn" in key:
18 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.FPN_LR_FACTOR
19 |         elif 'phrase_embed' in key:
20 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.PHRASE_EMBEDDING_LR_FACTOR
21 | 
22 |         # print(key)
23 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
24 |         # if "bias" in key:
25 |         #     lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
26 |         #     weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
27 |         if "bias" in key:
28 |             lr = lr * cfg.SOLVER.BIAS_LR_FACTOR
29 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
30 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
31 | 
32 |     if cfg.SOLVER.TYPE == "SGD":
33 |         optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
34 |     elif cfg.SOLVER.TYPE == 'Adam':
35 |         optimizer = torch.optim.Adam(params, lr)
36 |     else:
37 |         raise NotImplementedError
38 | 
39 |     return optimizer
40 | 
41 | 
42 | def make_lr_scheduler(cfg, optimizer):
43 |     return WarmupMultiStepLR(
44 |         optimizer,
45 |         cfg.SOLVER.STEPS,
46 |         cfg.SOLVER.GAMMA,
47 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
48 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
49 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
50 |     )
51 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 |     if isinstance(tensors, ImageList):
40 |         return tensors
41 |     elif isinstance(tensors, torch.Tensor):
42 |         # single tensor shape can be inferred
43 |         if tensors.dim() == 3:
44 |             tensors = tensors[None]
45 |         assert tensors.dim() == 4
46 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
47 |         return ImageList(tensors, image_sizes)
48 |     elif isinstance(tensors, (tuple, list)):
49 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
50 | 
51 |         # TODO Ideally, just remove this and let me model handle arbitrary
52 |         # input sizs
53 |         if size_divisible > 0:
54 |             import math
55 | 
56 |             stride = size_divisible
57 |             max_size = list(max_size)
58 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
59 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
60 |             max_size = tuple(max_size)
61 | 
62 |         batch_shape = (len(tensors),) + max_size
63 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
64 |         for img, pad_img in zip(tensors, batched_imgs):
65 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
66 | 
67 |         image_sizes = [im.shape[-2:] for im in tensors]
68 | 
69 |         return ImageList(batched_imgs, image_sizes)
70 |     else:
71 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
72 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/relation_triplet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .bounding_box import  BoxList
 4 | 
 5 | class RelationTriplet:
 6 |     def __init__(self, instance:BoxList,
 7 |                  pair_mat:torch.Tensor, phrase_label:torch.Tensor, phrase_score:torch.Tensor):
 8 |         """
 9 | 
10 |         :param sub: boxlist
11 |         :param obj: boxlist
12 |         :param pair_mat: shape (connection_num, 2) [sub, obj]
13 |         :param phrase_label: phrase label_id
14 |         :param phrase_score: phrase label_id
15 |         """
16 | 
17 |         self.instance = instance
18 | 
19 |         assert len(pair_mat) == len(phrase_label)
20 |         assert len(phrase_label) == len(phrase_score)
21 |         self.pair_mat = pair_mat
22 |         self.phrase_l = phrase_label
23 |         self.phrase_s = phrase_score
24 | 
25 |         self.extra_fields = {}
26 | 
27 |     def to(self, device):
28 |         triplet = RelationTriplet(self.instance.to(device),
29 |                                    self.pair_mat.to(device),
30 |                                    self.phrase_l.to(device),
31 |                                    self.phrase_s.to(device))
32 | 
33 |         for k, v in self.extra_fields.items():
34 |             if hasattr(v, "to"):
35 |                 v = v.to(device)
36 |             triplet.add_field(k, v)
37 |         return triplet
38 | 
39 | 
40 |     def get_instance_list(self, side):
41 |         assert side in ['sub', 'obj']
42 |         if side == 'sub':
43 |             return self.instance[self.pair_mat[: ,0]]
44 |         else:
45 |             return self.instance[self.pair_mat[:, 1]]
46 | 
47 | 
48 | 
49 |     """
50 |     add extra information to Box
51 |     """
52 |     def add_field(self, field, field_data):
53 |         assert len(field_data) == len(self.pair_mat)
54 |         self.extra_fields[field] = field_data
55 | 
56 |     def get_field(self, field):
57 |         return self.extra_fields[field]
58 | 
59 |     def has_field(self, field):
60 |         return field in self.extra_fields
61 | 
62 |     def fields(self):
63 |         return list(self.extra_fields.keys())
64 | 
65 | 
66 |     def __getitem__(self, item):
67 |         triplet = RelationTriplet(self.instance,
68 |                                    self.pair_mat[item],
69 |                                    self.phrase_l[item],
70 |                                    self.phrase_s[item])
71 |         for k, v in self.extra_fields.items():
72 |             triplet.add_field(k, v[item])
73 |         return triplet
74 | 
75 | 
76 |     def __repr__(self):
77 |         s = self.__class__.__name__ + "("
78 |         s += "num_relation={}, ".format(len(self.pair_mat))
79 |         s += "instance_num={}, ".format(len(self.instance))
80 |         s += ")"
81 |         return s


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/debugger.py:
--------------------------------------------------------------------------------
1 | import ipdb
2 | 
3 | from maskrcnn_benchmark.config import cfg
4 | 
5 | def set_trace():
6 |     if cfg.DEBUG:
7 |         ipdb.set_trace(context=10)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/direction_word_dict.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019-06-16 21:21
 4 | # @Author  : Yongfei Liu
 5 | # @Email   : liuyf3@shanghaitech.edu.cn
 6 | """
 7 | Flickr:
 8 | sent level, left:575, right:645
 9 | phrase level, left:535, right 572
10 | """
11 | 
12 | left_word_dict = ['left-hand', 'left', 'left-turning', 'camera-left']
13 | right_word_dict = ['right', 'upright', 'right-handed']


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=100):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})\n".format(name, meter.median, meter.avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/ops.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.6
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2019/8/11 15:31
 4 | # @Author  : Yongfei Liu
 5 | # @Email   : liuyf3@shanghaitech.edu.cn
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | 
11 | class Linear(nn.Linear):
12 |     def __init__(self, *args, **kwargs):
13 |         super().__init__(*args, **kwargs)
14 | 
15 |         # compatible with xavier_initializer in TensorFlow
16 |         fan_avg = (self.in_features + self.out_features) / 2.
17 |         bound = np.sqrt(3. / fan_avg)
18 |         nn.init.uniform_(self.weight, -bound, bound)
19 |         if self.bias is not None:
20 |             nn.init.constant_(self.bias, 0.)
21 | 
22 | 
23 | def apply_mask1d(attention, image_locs):
24 |     batch_size, num_loc = attention.size()
25 |     tmp1 = attention.new_zeros(num_loc)
26 |     tmp1[:num_loc] = torch.arange(
27 |         0, num_loc, dtype=attention.dtype).unsqueeze(0)
28 | 
29 |     tmp1 = tmp1.expand(batch_size, num_loc)
30 |     tmp2 = image_locs.type(tmp1.type())
31 |     tmp2 = tmp2.unsqueeze(dim=1).expand(batch_size, num_loc)
32 |     mask = torch.ge(tmp1, tmp2)
33 |     attention = attention.masked_fill(mask, -1e30)
34 |     return attention
35 | 
36 | def apply_mask2d(attention, image_locs):
37 |     batch_size, num_loc, _ = attention.size()
38 |     tmp1 = attention.new_zeros(num_loc)
39 |     tmp1[:num_loc] = torch.arange(
40 |         0, num_loc, dtype=attention.dtype).unsqueeze(0)
41 | 
42 |     tmp1 = tmp1.expand(batch_size, num_loc)
43 |     tmp2 = image_locs.type(tmp1.type())
44 |     tmp2 = tmp2.unsqueeze(dim=1).expand(batch_size, num_loc)
45 |     mask1d = torch.ge(tmp1, tmp2)
46 |     mask2d = mask1d[:, None, :] | mask1d[:, :, None]
47 |     attention = attention.masked_fill(mask2d, -1e30)
48 |     return attention


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import datetime
 5 | import time
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | class Timer(object):
11 |     def __init__(self):
12 |         self.reset()
13 | 
14 |     @property
15 |     def average_time(self):
16 |         return self.total_time / self.calls if self.calls > 0 else 0.0
17 | 
18 |     def tic(self):
19 |         # using time.time instead of time.clock because time time.clock
20 |         # does not normalize for multithreading
21 |         self.start_time = time.time()
22 | 
23 |     def toc(self, average=True):
24 |         self.add(time.time() - self.start_time)
25 |         if average:
26 |             return self.average_time
27 |         else:
28 |             return self.diff
29 | 
30 |     def add(self, time_diff):
31 |         self.diff = time_diff
32 |         self.total_time += self.diff
33 |         self.calls += 1
34 | 
35 |     def reset(self):
36 |         self.total_time = 0.0
37 |         self.calls = 0
38 |         self.start_time = 0.0
39 |         self.diff = 0.0
40 | 
41 |     def avg_time_str(self):
42 |         time_str = str(datetime.timedelta(seconds=self.average_time))
43 |         return time_str
44 | 
45 | 
46 | def get_time_str(time_diff):
47 |     time_str = str(datetime.timedelta(seconds=time_diff))
48 |     return time_str
49 | 
50 | 
51 | class SimpleTimer(object):
52 |     """A simple timer."""
53 | 
54 |     def __init__(self):
55 |         self.total_time = 0.
56 |         self.calls = 0
57 |         self.start_time = 0.
58 |         self.diff = 0.
59 |         self.average_time = 0.
60 | 
61 |     def tic(self):
62 |         # using time.time instead of time.clock because time time.clock
63 |         # does not normalize for multithreading
64 |         self.start_time = time.time()
65 | 
66 |     def toc(self, event="", average=False):
67 |         torch.cuda.synchronize()
68 |         self.diff = time.time() - self.start_time
69 |         self.total_time += self.diff
70 |         self.calls += 1
71 |         self.average_time = self.total_time / self.calls
72 |         if average:
73 |             return self.average_time
74 |         else:
75 |             print("%-28s %f" % (event + "cost time:", self.diff))
76 |             return self.diff
77 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | yacs
3 | cython
4 | matplotlib
5 | tqdm
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/skipthoughts/__init__.py:
--------------------------------------------------------------------------------
 1 | from .version import __version__
 2 | 
 3 | from .skipthoughts import AbstractSkipThoughts
 4 | 
 5 | from .skipthoughts import AbstractUniSkip
 6 | from .skipthoughts import UniSkip
 7 | from .skipthoughts import DropUniSkip
 8 | from .skipthoughts import BayesianUniSkip
 9 | 
10 | from .skipthoughts import AbstractBiSkip
11 | from .skipthoughts import BiSkip
12 | 
13 | from .gru import AbstractGRUCell
14 | from .gru import GRUCell
15 | from .gru import BayesianGRUCell
16 | 
17 | from .gru import AbstractGRU
18 | from .gru import GRU
19 | from .gru import BayesianGRU
20 | 
21 | from .dropout import EmbeddingDropout
22 | from .dropout import SequentialDropout


--------------------------------------------------------------------------------
/skipthoughts/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.0'


--------------------------------------------------------------------------------
/tests/env_tests/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | def get_config_root_path():
 7 |     ''' Path to configs for unit tests '''
 8 |     # cur_file_dir is root/tests/env_tests
 9 |     cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
10 |     ret = os.path.dirname(os.path.dirname(cur_file_dir))
11 |     ret = os.path.join(ret, "configs")
12 |     return ret
13 | 


--------------------------------------------------------------------------------
/tests/test_backbones.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register backbones
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling import registry
 9 | from maskrcnn_benchmark.config import cfg as g_cfg
10 | from .utils import load_config
11 | 
12 | 
13 | # overwrite configs if specified, otherwise default config is used
14 | BACKBONE_CFGS = {
15 |     "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml",
16 |     "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
17 |     "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
18 |     "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml",
19 |     "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml",
20 | }
21 | 
22 | 
23 | class TestBackbones(unittest.TestCase):
24 |     def test_build_backbones(self):
25 |         ''' Make sure backbones run '''
26 | 
27 |         self.assertGreater(len(registry.BACKBONES), 0)
28 | 
29 |         for name, backbone_builder in registry.BACKBONES.items():
30 |             print('Testing {}...'.format(name))
31 |             if name in BACKBONE_CFGS:
32 |                 cfg = load_config(BACKBONE_CFGS[name])
33 |             else:
34 |                 # Use default config if config file is not specified
35 |                 cfg = copy.deepcopy(g_cfg)
36 |             backbone = backbone_builder(cfg)
37 | 
38 |             # make sures the backbone has `out_channels`
39 |             self.assertIsNotNone(
40 |                 getattr(backbone, 'out_channels', None),
41 |                 'Need to provide out_channels for backbone {}'.format(name)
42 |             )
43 | 
44 |             N, C_in, H, W = 2, 3, 224, 256
45 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
46 |             out = backbone(input)
47 |             for cur_out in out:
48 |                 self.assertEqual(
49 |                     cur_out.shape[:2],
50 |                     torch.Size([N, backbone.out_channels])
51 |                 )
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/test_configs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import glob
 5 | import os
 6 | import utils
 7 | 
 8 | 
 9 | class TestConfigs(unittest.TestCase):
10 |     def test_configs_load(self):
11 |         ''' Make sure configs are loadable '''
12 | 
13 |         cfg_root_path = utils.get_config_root_path()
14 |         files = glob.glob(
15 |             os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True)
16 |         self.assertGreater(len(files), 0)
17 | 
18 |         for fn in files:
19 |             print('Loading {}...'.format(fn))
20 |             utils.load_config_from_file(fn)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/tests/test_metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | 
 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 5 | 
 6 | 
 7 | class TestMetricLogger(unittest.TestCase):
 8 |     def test_update(self):
 9 |         meter = MetricLogger()
10 |         for i in range(10):
11 |             meter.update(metric=float(i))
12 |         
13 |         m = meter.meters["metric"]
14 |         self.assertEqual(m.count, 10)
15 |         self.assertEqual(m.total, 45)
16 |         self.assertEqual(m.median, 4)
17 |         self.assertEqual(m.avg, 4.5)
18 | 
19 |     def test_no_attr(self):
20 |         meter = MetricLogger()
21 |         _ = meter.meters
22 |         _ = meter.delimiter
23 |         def broken():
24 |             _ = meter.not_existent
25 |         self.assertRaises(AttributeError, broken)
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/tests/test_rpn_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register rpn heads
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA
 9 | from maskrcnn_benchmark.modeling import registry
10 | from maskrcnn_benchmark.config import cfg as g_cfg
11 | from utils import load_config
12 | 
13 | 
14 | # overwrite configs if specified, otherwise default config is used
15 | RPN_CFGS = {
16 | }
17 | 
18 | 
19 | class TestRPNHeads(unittest.TestCase):
20 |     def test_build_rpn_heads(self):
21 |         ''' Make sure rpn heads run '''
22 | 
23 |         self.assertGreater(len(registry.RPN_HEADS), 0)
24 | 
25 |         in_channels = 64
26 |         num_anchors = 10
27 | 
28 |         for name, builder in registry.RPN_HEADS.items():
29 |             print('Testing {}...'.format(name))
30 |             if name in RPN_CFGS:
31 |                 cfg = load_config(RPN_CFGS[name])
32 |             else:
33 |                 # Use default config if config file is not specified
34 |                 cfg = copy.deepcopy(g_cfg)
35 | 
36 |             rpn = builder(cfg, in_channels, num_anchors)
37 | 
38 |             N, C_in, H, W = 2, in_channels, 24, 32
39 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
40 |             LAYERS = 3
41 |             out = rpn([input] * LAYERS)
42 |             self.assertEqual(len(out), 2)
43 |             logits, bbox_reg = out
44 |             for idx in range(LAYERS):
45 |                 self.assertEqual(
46 |                     logits[idx].shape,
47 |                     torch.Size([
48 |                         input.shape[0], num_anchors,
49 |                         input.shape[2], input.shape[3],
50 |                     ])
51 |                 )
52 |                 self.assertEqual(
53 |                     bbox_reg[idx].shape,
54 |                     torch.Size([
55 |                         logits[idx].shape[0], num_anchors * 4,
56 |                         logits[idx].shape[2], logits[idx].shape[3],
57 |                     ]),
58 |                 )
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | 
 3 | # Set up custom environment before nearly anything else is imported
 4 | # NOTE: this should be the first import (no not reorder)
 5 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 6 | import env_tests.env as env_tests
 7 | 
 8 | import os
 9 | import copy
10 | 
11 | from maskrcnn_benchmark.config import cfg as g_cfg
12 | 
13 | 
14 | def get_config_root_path():
15 |     return env_tests.get_config_root_path()
16 | 
17 | 
18 | def load_config(rel_path):
19 |     ''' Load config from file path specified as path relative to config_root '''
20 |     cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path)
21 |     return load_config_from_file(cfg_path)
22 | 
23 | 
24 | def load_config_from_file(file_path):
25 |     ''' Load config from file path specified as absolute path '''
26 |     ret = copy.deepcopy(g_cfg)
27 |     ret.merge_from_file(file_path)
28 |     return ret
29 | 


--------------------------------------------------------------------------------