├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── README.md
├── config.yml
├── configs
    ├── caffe2
    │   ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    ├── cityscapes
    │   ├── README.md
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── dcn
    │   ├── README.md
    │   ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml
    │   ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml
    │   └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x.yaml
    ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
    ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_faster_rcnn_fbnet.yaml
    ├── e2e_faster_rcnn_fbnet_600.yaml
    ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
    ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_50_C4_1x.yaml
    ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_mask_rcnn_fbnet.yaml
    ├── e2e_mask_rcnn_fbnet_600.yaml
    ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
    ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
    ├── gn_baselines
    │   ├── README.md
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    ├── pascal_voc
    │   ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
    │   ├── e2e_faster_rcnn_R_50_exp01.yaml
    │   ├── e2e_faster_rcnn_R_50_exp02_semi.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── quick_schedules
    │   ├── e2e_faster_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── rpn_R_50_C4_quick.yaml
    │   └── rpn_R_50_FPN_quick.yaml
    ├── retinanet
    │   ├── retinanet_R-101-FPN_1x.yaml
    │   ├── retinanet_R-101-FPN_P5_1x.yaml
    │   ├── retinanet_R-50-FPN_1x.yaml
    │   ├── retinanet_R-50-FPN_1x_quick.yaml
    │   ├── retinanet_R-50-FPN_P5_1x.yaml
    │   └── retinanet_X_101_32x8d_FPN_1x.yaml
    ├── rpn_R_101_FPN_1x.yaml
    ├── rpn_R_50_C4_1x.yaml
    ├── rpn_R_50_FPN_1x.yaml
    ├── rpn_X_101_32x8d_FPN_1x.yaml
    └── test_time_aug
    │   └── e2e_mask_rcnn_R_50_FPN_1x.yaml
├── datasets
    └── voc
    │   └── VOC2007
├── demo
    ├── Mask_R-CNN_demo.ipynb
    ├── README.md
    ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
    ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
    ├── panoptic_segmentation_shapes_dataset_demo.ipynb
    ├── predictor.py
    ├── shapes_dataset_demo.ipynb
    ├── shapes_pruning.ipynb
    └── webcam.py
├── docker
    ├── Dockerfile
    └── docker-jupyter
    │   ├── Dockerfile
    │   └── jupyter_notebook_config.py
├── inference
    └── voc_2007_test
    │   ├── predictions.pth
    │   └── result.txt
├── maskrcnn_benchmark
    ├── _C.cpython-36m-x86_64-linux-gnu.so
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── SigmoidFocalLoss.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   ├── soft_nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── deform_conv_cuda.cu
    │   │   ├── deform_conv_kernel_cuda.cu
    │   │   ├── deform_pool_cuda.cu
    │   │   ├── deform_pool_kernel_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── deform_conv.h
    │   ├── deform_pool.h
    │   ├── nms.h
    │   ├── soft_nms.h
    │   └── vision.cpp
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── build.py
    │   ├── build_semi.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── coco_origin.py
    │   │   ├── concat_dataset.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── coco_eval.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── voc_eval.py
    │   │   ├── list_dataset.py
    │   │   ├── unlabeled_img.py
    │   │   └── voc.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── transforms.py
    │   │   └── transforms_reverse.py
    ├── engine
    │   ├── __init__.py
    │   ├── bbox_aug.py
    │   ├── inference.py
    │   ├── trainer.py
    │   └── trainer_semi.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv_func.py
    │   │   ├── deform_conv_module.py
    │   │   ├── deform_pool_func.py
    │   │   └── deform_pool_module.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   ├── sigmoid_focal_loss.py
    │   └── smooth_l1_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fbnet.py
    │   │   ├── fbnet_builder.py
    │   │   ├── fbnet_modeldef.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   └── generalized_rcnn.py
    │   ├── make_layers.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── registry.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── keypoint_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── keypoint_head.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   └── roi_keypoint_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   └── retinanet.py
    │   │   ├── rpn.py
    │   │   └── utils.py
    │   └── utils.py
    ├── semi
    │   └── __init__.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   ├── keypoint.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── cv2_util.py
    │   ├── env.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── model_zoo.py
    │   ├── registry.py
    │   └── timer.py
├── requirements.txt
├── semi_test
    ├── __init__.py
    ├── coco_eval.py
    ├── curriculum_sel.py
    ├── cython_nms.pyx
    ├── disk_clear.py
    ├── e2e_semi.yaml
    ├── inference_ens.py
    ├── post_process.py
    ├── retina_voc.yaml
    ├── retina_voc_paper.yaml
    ├── retina_voc_semi.yaml
    ├── retinanet_R-101-FPN_1.5x.yaml
    ├── retinanet_R-101-FPN_1.5x_semi.yaml
    ├── retinanet_R-101-FPN_1x_unlabeled.yaml
    ├── retinanet_R-50-FPN_1x_coco_unlabeled.yaml
    ├── retinanet_R-50-FPN_1x_semi.yaml
    ├── retinanet_R-50-FPN_1x_semi_resize.yaml
    ├── retinanet_R-50-FPN_1x_semi_step2.yaml
    ├── retinanet_R-50-FPN_val.yaml
    ├── retinanet_R-50-FPN_val_semi.yaml
    ├── retinanet_X_101_32x8d_FPN_1x_unlableled.yaml
    ├── semi_loss.py
    ├── tempor_ens_cp.py
    ├── temporal_ens_scores.py
    ├── temporal_ens_test.py
    ├── test_net_ens.py
    └── test_units.py
├── setup.py
├── tests
    ├── checkpoint.py
    ├── env_tests
    │   └── env.py
    ├── test_backbones.py
    ├── test_box_coder.py
    ├── test_configs.py
    ├── test_data_samplers.py
    ├── test_detectors.py
    ├── test_fbnet.py
    ├── test_feature_extractors.py
    ├── test_metric_logger.py
    ├── test_nms.py
    ├── test_predictors.py
    ├── test_rpn_heads.py
    ├── test_segmentation_mask.py
    └── utils.py
└── tools
    ├── cityscapes
        ├── convert_cityscapes_to_coco.py
        └── instances2dict_with_polygons.py
    ├── config.yml
    ├── datasets
        └── voc
        │   ├── VOC2007
        │   └── VOC2012
    ├── test_net.py
    ├── test_net_model_info_coll.py
    ├── train_net.py
    └── train_net_semi.py


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Mask-RCNN Benchmark
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `master`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Facebook's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces for indentation rather than tabs
34 | * 80 character line length
35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/)
36 | 
37 | ## License
38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed
39 | under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Facebook
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Temporal Self-Ensembling Teacher for Semi-Supervised Object Detection
 2 | 
 3 | 
 4 | This repository contains the code  for [Temporal Self-Ensembling Teacher for Semi-Supervised Object Detection](https://arxiv.org/abs/2007.06144), by Cong Chen, Shouyang Dong, Ye Tian, Kunlin Cao, Li Liu, Yuanhao Guo, arXiv arXiv:2007.06144:
 5 | 
 6 | If you use the code in this repository for a published research project, please cite this paper.
 7 | 
 8 | The code is designed to run on Pytorch and Python using the dependencies listed in requirements.txt. You can install the dependencies by running pip install -r requirements.txt
 9 | For the relevent packages about this detection framework, please find installation instructions for this repository in INSTALL.md.
10 | 
11 | ## Introducetion
12 |  We propose a novel method Temporal Self-Ensembling Teacher (TSE-T) for SSOD. Differently from previous KD based methods, we devise a temporally evolved teacher model. First, our teacher model ensembles its temporal predictions for unlabeled images under stochastic perturbations. Second, our teacher model ensembles its temporal model weights with the student model weights by an exponential moving average (EMA) which allows the teacher gradually learn from the student. These self-ensembling strategies increase data and model diversity, thus improving teacher predictions on unlabeled images. Finally, we use focal loss to formulate consistency regularization term to handle the data imbalance problem, which is a more efficient manner to utilize the useful information from unlabeled images than a simple hard-thresholding method which solely preserves confident predictions.
13 | 
14 | ## Reference 
15 | The retina network is reference by https://github.com/facebookresearch/Detectron
16 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 |   META_ARCHITECTURE: "GeneralizedRCNN"
3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 |   TEST: ("coco_2014_minival",)
6 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 |   SIZE_DIVISIBILITY: 32
30 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
 4 |   ROI_MASK_HEAD:
 5 |     PREDICTOR: "MaskRCNNC4Predictor"
 6 |     SHARE_BOX_FEATURE_EXTRACTOR: True
 7 |   MASK_ON: True
 8 | DATASETS:
 9 |   TEST: ("coco_2014_minival",)
10 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-152-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 | DATASETS:
25 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
26 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
27 | DATALOADER:
28 |   SIZE_DIVISIBILITY: 32
29 | SOLVER:
30 |   BASE_LR: 0.01
31 |   WEIGHT_DECAY: 0.0001
32 |   STEPS: (18000,)
33 |   MAX_ITER: 24000
34 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
35 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/dcn/README.md:
--------------------------------------------------------------------------------
 1 | ### Reference 
 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf)  
 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn)  
 4 | 
 5 | ### Performance
 6 | |      case                   | bbox AP | mask AP |
 7 | |----------------------------:|--------:|:-------:|
 8 | | R-50-FPN-dcn (implement)    |  39.8   |  -      |
 9 | | R-50-FPN-dcn (mmdetection)  |  40.0   |  -      |
10 | | R-50-FPN-mdcn (implement)   |  40.0   |  -      |
11 | | R-50-FPN-mdcn (mmdetection) |  40.3   |  -      |
12 | | R-50-FPN-dcn (implement)    |  40.8   |  36.8   |
13 | | R-50-FPN-dcn (mmdetection)  |  41.1   |  37.2   |
14 | | R-50-FPN-dcn (implement)    |  40.7   |  36.7   |
15 | | R-50-FPN-dcn (mmdetection)  |  41.4   |  37.4   |
16 | 
17 | 
18 | ### Note
19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details.  
20 | 
21 | 
22 | ### Usage
23 | add these three lines
24 | ```
25 | MODEL:
26 | 	RESNETS:
27 | 		# corresponding to C2,C3,C4,C5
28 | 		STAGE_WITH_DCN: (False, True, True, True)
29 | 		WITH_MODULATED_DCN: True
30 | 		DEFORMABLE_GROUPS: 1
31 | ```


--------------------------------------------------------------------------------
/configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: False
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 8 gpus
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | TEST:
44 |   IMS_PER_BATCH: 8
45 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: True
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 8 gpus
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | TEST:
44 |   IMS_PER_BATCH: 8
45 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: False
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   ROI_MASK_HEAD:
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
34 |     PREDICTOR: "MaskRCNNC4Predictor"
35 |     POOLER_RESOLUTION: 14
36 |     POOLER_SAMPLING_RATIO: 2
37 |     RESOLUTION: 28
38 |     SHARE_BOX_FEATURE_EXTRACTOR: False
39 |   MASK_ON: True
40 | DATASETS:
41 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
42 |   TEST: ("coco_2014_minival",)
43 | DATALOADER:
44 |   SIZE_DIVISIBILITY: 32
45 | SOLVER:
46 |   # Assume 8 gpus
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 |   IMS_PER_BATCH: 16
52 | TEST:
53 |   IMS_PER_BATCH: 8
54 | 
55 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: True
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   ROI_MASK_HEAD:
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
34 |     PREDICTOR: "MaskRCNNC4Predictor"
35 |     POOLER_RESOLUTION: 14
36 |     POOLER_SAMPLING_RATIO: 2
37 |     RESOLUTION: 28
38 |     SHARE_BOX_FEATURE_EXTRACTOR: False
39 |   MASK_ON: True
40 | DATASETS:
41 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
42 |   TEST: ("coco_2014_minival",)
43 | DATALOADER:
44 |   SIZE_DIVISIBILITY: 32
45 | SOLVER:
46 |   # Assume 8 gpus
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 |   IMS_PER_BATCH: 16
52 | TEST:
53 |   IMS_PER_BATCH: 8
54 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
 9 |   TEST: ("coco_2014_minival",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 8
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RPN:
 7 |     USE_FPN: True
 8 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
 9 |     PRE_NMS_TOP_N_TRAIN: 2000
10 |     PRE_NMS_TOP_N_TEST: 1000
11 |     POST_NMS_TOP_N_TEST: 1000
12 |     FPN_POST_NMS_TOP_N_TEST: 1000
13 |   ROI_HEADS:
14 |     USE_FPN: True
15 |   ROI_BOX_HEAD:
16 |     POOLER_RESOLUTION: 7
17 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
18 |     POOLER_SAMPLING_RATIO: 2
19 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
20 |     PREDICTOR: "FPNPredictor"
21 |   RESNETS:
22 |     BACKBONE_OUT_CHANNELS: 256
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 |   SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 |   BASE_LR: 0.01
33 |   WEIGHT_DECAY: 0.0001
34 |   STEPS: (120000, 160000)
35 |   MAX_ITER: 180000
36 |   IMS_PER_BATCH: 8
37 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 100
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 512
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (320, )
40 |   MAX_SIZE_TRAIN: 640
41 |   MIN_SIZE_TEST: 320
42 |   MAX_SIZE_TEST: 640
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "cham_v1a"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 128
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.045
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (90000, 120000)
34 |   MAX_ITER: 135000
35 |   IMS_PER_BATCH: 96  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 |   TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_MASK_HEAD:
 8 |     PREDICTOR: "MaskRCNNC4Predictor"
 9 |     SHARE_BOX_FEATURE_EXTRACTOR: True
10 |   MASK_ON: True
11 | DATASETS:
12 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 |   TEST: ("coco_2014_minival",)
14 | SOLVER:
15 |   BASE_LR: 0.01
16 |   WEIGHT_DECAY: 0.0001
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   IMS_PER_BATCH: 8
20 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 |   TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 |   TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.01
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (120000, 160000)
44 |   MAX_ITER: 180000
45 |   IMS_PER_BATCH: 8
46 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: -1.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 512
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/README.md:
--------------------------------------------------------------------------------
 1 | ### Group Normalization
 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494)  
 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883)  
 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md)  
 5 | 
 6 | 
 7 | ### Performance
 8 | |      case                  |    Type      |  lr schd  |  im/gpu | bbox AP | mask AP |
 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:|
10 | |   R-50-FPN, GN (paper)     | finetune     |    2x     |   2     |   40.3  |  35.7   |
11 | |   R-50-FPN, GN (implement) | finetune     |    2x     |   2     |   40.2  |  36.0   |
12 | |   R-50-FPN, GN (paper)     | from scratch |    3x     |   2     |   39.5  |  35.2   |
13 | |   R-50-FPN, GN (implement) | from scratch |    3x     |   2     |   38.9  |  35.1   |
14 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | DATALOADER:
40 |   SIZE_DIVISIBILITY: 32
41 | SOLVER:
42 |   # Assume 8 gpus
43 |   BASE_LR: 0.02
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (60000, 80000)
46 |   MAX_ITER: 90000
47 |   IMS_PER_BATCH: 16
48 | TEST:
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 | DATASETS:
39 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
40 |   TEST: ("coco_2014_minival",)
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 8 gpus
45 |   BASE_LR: 0.02
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (60000, 80000)
48 |   MAX_ITER: 90000
49 |   IMS_PER_BATCH: 16
50 | TEST:
51 |   IMS_PER_BATCH: 8
52 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 |   ROI_MASK_HEAD:
37 |     USE_GN: True # use GN for mask head
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     CONV_LAYERS: (256, 256, 256, 256)
40 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
41 |     PREDICTOR: "MaskRCNNC4Predictor"
42 |     POOLER_RESOLUTION: 14
43 |     POOLER_SAMPLING_RATIO: 2
44 |     RESOLUTION: 28
45 |     SHARE_BOX_FEATURE_EXTRACTOR: False
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
49 |   TEST: ("coco_2014_minival",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   # Assume 8 gpus
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (60000, 80000)
57 |   MAX_ITER: 90000
58 |   IMS_PER_BATCH: 16
59 | TEST:
60 |   IMS_PER_BATCH: 8
61 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 |   ROI_MASK_HEAD:
39 |     USE_GN: True # use GN for mask head
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     CONV_LAYERS: (256, 256, 256, 256)
42 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
43 |     PREDICTOR: "MaskRCNNC4Predictor"
44 |     POOLER_RESOLUTION: 14
45 |     POOLER_SAMPLING_RATIO: 2
46 |     RESOLUTION: 28
47 |     SHARE_BOX_FEATURE_EXTRACTOR: False
48 |   MASK_ON: True
49 | DATASETS:
50 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
51 |   TEST: ("coco_2014_minival",)
52 | DATALOADER:
53 |   SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 |   # Assume 8 gpus
56 |   BASE_LR: 0.02
57 |   WEIGHT_DECAY: 0.0001
58 |   STEPS: (60000, 80000)
59 |   MAX_ITER: 90000
60 |   IMS_PER_BATCH: 16
61 | TEST:
62 |   IMS_PER_BATCH: 8
63 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 | DATASETS:
38 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
39 |   TEST: ("coco_2014_minival",)
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   # Assume 8 gpus
44 |   BASE_LR: 0.02
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (210000, 250000)
47 |   MAX_ITER: 270000
48 |   IMS_PER_BATCH: 16
49 | TEST:
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 | DATASETS:
40 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
41 |   TEST: ("coco_2014_minival",)
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 8 gpus
46 |   BASE_LR: 0.02
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (210000, 250000)
49 |   MAX_ITER: 270000
50 |   IMS_PER_BATCH: 16
51 | TEST:
52 |   IMS_PER_BATCH: 8
53 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 |   ROI_MASK_HEAD:
38 |     USE_GN: True # use GN for mask head
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     CONV_LAYERS: (256, 256, 256, 256)
41 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
42 |     PREDICTOR: "MaskRCNNC4Predictor"
43 |     POOLER_RESOLUTION: 14
44 |     POOLER_SAMPLING_RATIO: 2
45 |     RESOLUTION: 28
46 |     SHARE_BOX_FEATURE_EXTRACTOR: False
47 |   MASK_ON: True
48 | DATASETS:
49 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
50 |   TEST: ("coco_2014_minival",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   # Assume 8 gpus
55 |   BASE_LR: 0.02
56 |   WEIGHT_DECAY: 0.0001
57 |   STEPS: (210000, 250000)
58 |   MAX_ITER: 270000
59 |   IMS_PER_BATCH: 16
60 | TEST:
61 |   IMS_PER_BATCH: 8
62 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 |   ROI_MASK_HEAD:
40 |     USE_GN: True # use GN for mask head
41 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
42 |     CONV_LAYERS: (256, 256, 256, 256)
43 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
44 |     PREDICTOR: "MaskRCNNC4Predictor"
45 |     POOLER_RESOLUTION: 14
46 |     POOLER_SAMPLING_RATIO: 2
47 |     RESOLUTION: 28
48 |     SHARE_BOX_FEATURE_EXTRACTOR: False
49 |   MASK_ON: True
50 | DATASETS:
51 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
52 |   TEST: ("coco_2014_minival",)
53 | DATALOADER:
54 |   SIZE_DIVISIBILITY: 32
55 | SOLVER:
56 |   # Assume 8 gpus
57 |   BASE_LR: 0.02
58 |   WEIGHT_DECAY: 0.0001
59 |   STEPS: (210000, 250000)
60 |   MAX_ITER: 270000
61 |   IMS_PER_BATCH: 16
62 | TEST:
63 |   IMS_PER_BATCH: 8
64 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.001
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (50000, )
17 |   MAX_ITER: 70000
18 |   IMS_PER_BATCH: 1
19 | TEST:
20 |   IMS_PER_BATCH: 1
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 17500
18 |   IMS_PER_BATCH: 4
19 | TEST:
20 |   IMS_PER_BATCH: 4
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_exp01.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 17500
18 |   IMS_PER_BATCH: 8
19 | TEST:
20 |   IMS_PER_BATCH: 8
21 | OUTPUT_DIR: ./exp01/  


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_exp02_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 91750000
18 |   IMS_PER_BATCH: 4
19 |   CHECKPOINT_PERIOD: 10000
20 | INPUT: 
21 |   SEMI: True
22 | TEST:
23 |   IMS_PER_BATCH: 4
24 | SEMI:
25 |   EMA_DECAY: 0.99
26 |   TEMPORAL_NUM: 10
27 |   TEMPORAL_SAVE_PATH:
28 |   
29 | OUTPUT_DIR:
30 |   


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 21
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("voc_2012_train_cocostyle",)
35 |   TEST: ("voc_2012_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 | DATASETS:
10 |   TRAIN: ("coco_2014_minival",)
11 |   TEST: ("coco_2014_minival",)
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (600,)
14 |   MAX_SIZE_TRAIN: 1000
15 |   MIN_SIZE_TEST: 800
16 |   MAX_SIZE_TEST: 1000
17 | SOLVER:
18 |   BASE_LR: 0.005
19 |   WEIGHT_DECAY: 0.0001
20 |   STEPS: (1500,)
21 |   MAX_ITER: 2000
22 |   IMS_PER_BATCH: 2
23 | TEST:
24 |   IMS_PER_BATCH: 2
25 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 | DATASETS:
25 |   TRAIN: ("coco_2014_minival",)
26 |   TEST: ("coco_2014_minival",)
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (600,)
29 |   MAX_SIZE_TRAIN: 1000
30 |   MIN_SIZE_TEST: 800
31 |   MAX_SIZE_TEST: 1000
32 | DATALOADER:
33 |   SIZE_DIVISIBILITY: 32
34 | SOLVER:
35 |   BASE_LR: 0.005
36 |   WEIGHT_DECAY: 0.0001
37 |   STEPS: (1500,)
38 |   MAX_ITER: 2000
39 |   IMS_PER_BATCH: 4
40 | TEST:
41 |   IMS_PER_BATCH: 2
42 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 | DATASETS:
28 |   TRAIN: ("coco_2014_minival",)
29 |   TEST: ("coco_2014_minival",)
30 | INPUT:
31 |   MIN_SIZE_TRAIN: (600,)
32 |   MAX_SIZE_TRAIN: 1000
33 |   MIN_SIZE_TEST: 800
34 |   MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (1500,)
41 |   MAX_ITER: 2000
42 |   IMS_PER_BATCH: 2
43 | TEST:
44 |   IMS_PER_BATCH: 2
45 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |     NUM_CLASSES: 2
25 |   ROI_KEYPOINT_HEAD:
26 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
27 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
28 |     PREDICTOR: "KeypointRCNNPredictor"
29 |     POOLER_RESOLUTION: 14
30 |     POOLER_SAMPLING_RATIO: 2
31 |     RESOLUTION: 56
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   KEYPOINT_ON: True
34 | DATASETS:
35 |   TRAIN: ("keypoints_coco_2014_minival",)
36 |   TEST: ("keypoints_coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
39 |   MAX_SIZE_TRAIN: 1000
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1000
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (1500,)
48 |   MAX_ITER: 2000
49 |   IMS_PER_BATCH: 4
50 | TEST:
51 |   IMS_PER_BATCH: 2
52 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |   ROI_MASK_HEAD:
10 |     PREDICTOR: "MaskRCNNC4Predictor"
11 |     SHARE_BOX_FEATURE_EXTRACTOR: True
12 |   MASK_ON: True
13 | DATASETS:
14 |   TRAIN: ("coco_2014_minival",)
15 |   TEST: ("coco_2014_minival",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (600,)
18 |   MAX_SIZE_TRAIN: 1000
19 |   MIN_SIZE_TEST: 800
20 |   MAX_SIZE_TEST: 1000
21 | SOLVER:
22 |   BASE_LR: 0.005
23 |   WEIGHT_DECAY: 0.0001
24 |   STEPS: (1500,)
25 |   MAX_ITER: 2000
26 |   IMS_PER_BATCH: 4
27 | TEST:
28 |   IMS_PER_BATCH: 2
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (1500,)
47 |   MAX_ITER: 2000
48 |   IMS_PER_BATCH: 4
49 | TEST:
50 |   IMS_PER_BATCH: 2
51 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2014_minival",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (600,)
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (1500,)
50 |   MAX_ITER: 2000
51 |   IMS_PER_BATCH: 2
52 | TEST:
53 |   IMS_PER_BATCH: 2
54 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_minival",)
10 |   TEST: ("coco_2014_minival",)
11 | INPUT:
12 |   MIN_SIZE_TRAIN: (600,)
13 |   MAX_SIZE_TRAIN: 1000
14 |   MIN_SIZE_TEST: 800
15 |   MAX_SIZE_TEST: 1000
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (1500,)
20 |   MAX_ITER: 2000
21 |   IMS_PER_BATCH: 4
22 | TEST:
23 |   IMS_PER_BATCH: 2
24 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_minival",)
17 |   TEST: ("coco_2014_minival",)
18 | INPUT:
19 |   MIN_SIZE_TRAIN: (600,)
20 |   MAX_SIZE_TRAIN: 1000
21 |   MIN_SIZE_TEST: 800
22 |   MAX_SIZE_TEST: 1000
23 | DATALOADER:
24 |   SIZE_DIVISIBILITY: 32
25 | SOLVER:
26 |   BASE_LR: 0.005
27 |   WEIGHT_DECAY: 0.0001
28 |   STEPS: (1500,)
29 |   MAX_ITER: 2000
30 |   IMS_PER_BATCH: 4
31 | TEST:
32 |   IMS_PER_BATCH: 2
33 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800, )
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800, )
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (3500,)
47 |   MAX_ITER: 4000
48 |   IMS_PER_BATCH: 4
49 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800,)
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |     STRIDE_IN_1X1: False
11 |     NUM_GROUPS: 32
12 |     WIDTH_PER_GROUP: 8
13 |   RPN:
14 |     USE_FPN: True
15 |     FG_IOU_THRESHOLD: 0.5
16 |     BG_IOU_THRESHOLD: 0.4
17 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |     PRE_NMS_TOP_N_TRAIN: 2000
19 |     PRE_NMS_TOP_N_TEST: 1000
20 |     POST_NMS_TOP_N_TEST: 1000
21 |     FPN_POST_NMS_TOP_N_TEST: 1000
22 |   ROI_HEADS:
23 |     USE_FPN: True
24 |     BATCH_SIZE_PER_IMAGE: 256
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   RETINANET:
32 |     SCALES_PER_OCTAVE: 3
33 |     STRADDLE_THRESH: -1
34 |     FG_IOU_THRESHOLD: 0.5
35 |     BG_IOU_THRESHOLD: 0.4
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800, )
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (240000, 320000)
51 |   MAX_ITER: 360000
52 |   IMS_PER_BATCH: 4
53 | 


--------------------------------------------------------------------------------
/configs/rpn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
10 |   TEST: ("coco_2014_minival",)
11 | SOLVER:
12 |   BASE_LR: 0.02
13 |   WEIGHT_DECAY: 0.0001
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | 


--------------------------------------------------------------------------------
/configs/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/rpn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |     STRIDE_IN_1X1: False
10 |     NUM_GROUPS: 32
11 |     WIDTH_PER_GROUP: 8
12 |   RPN:
13 |     USE_FPN: True
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 2000
17 |     FPN_POST_NMS_TOP_N_TEST: 2000
18 | DATASETS:
19 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
20 |   TEST: ("coco_2014_minival",)
21 | DATALOADER:
22 |   SIZE_DIVISIBILITY: 32
23 | SOLVER:
24 |   BASE_LR: 0.02
25 |   WEIGHT_DECAY: 0.0001
26 |   STEPS: (60000, 80000)
27 |   MAX_ITER: 90000
28 | 


--------------------------------------------------------------------------------
/configs/test_time_aug/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 |   TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | TEST:
43 |   BBOX_AUG:
44 |     ENABLED: True
45 |     H_FLIP: True
46 |     SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
47 |     MAX_SIZE: 2000
48 |     SCALE_H_FLIP: True
49 | 


--------------------------------------------------------------------------------
/datasets/voc/VOC2007:
--------------------------------------------------------------------------------
1 | /home/chencong/DiscE/web_db/voc/VOCdevkit/VOC2007


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
 4 | 
 5 | 
 6 | ### With your preferred environment
 7 | 
 8 | You can start it by running it from this folder, using one of the following commands:
 9 | ```bash
10 | # by default, it runs on the GPU
11 | # for best results, use min-image-size 800
12 | python webcam.py --min-image-size 800
13 | # can also run it on the CPU
14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
15 | # or change the model that you want to use
16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
19 | ```
20 | 
21 | ### With Docker
22 | 
23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
24 | 
25 | Adjust permissions of the X server host (be careful with this step, refer to 
26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
27 | 
28 | ```bash
29 | xhost +
30 | ``` 
31 | 
32 | Then run a container with the demo:
33 |  
34 | ```
35 | docker run --rm -it \
36 |     -e DISPLAY=${DISPLAY} \
37 |     --privileged \
38 |     -v /tmp/.X11-unix:/tmp/.X11-unix \
39 |     --device=/dev/video0:/dev/video0 \
40 |     --ipc=host maskrcnn-benchmark \
41 |     python demo/webcam.py --min-image-size 300 \
42 |     --config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
43 | ```
44 | 
45 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 
46 | the volume mapping may vary depending on your platform*
47 | 


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png


--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import cv2
 4 | 
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import COCODemo
 7 | 
 8 | import time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |     )
19 |     parser.add_argument(
20 |         "--confidence-threshold",
21 |         type=float,
22 |         default=0.7,
23 |         help="Minimum score for the prediction to be shown",
24 |     )
25 |     parser.add_argument(
26 |         "--min-image-size",
27 |         type=int,
28 |         default=224,
29 |         help="Smallest size of the image to feed to the model. "
30 |             "Model was trained with 800, which gives best results",
31 |     )
32 |     parser.add_argument(
33 |         "--show-mask-heatmaps",
34 |         dest="show_mask_heatmaps",
35 |         help="Show a heatmap probability for the top masks-per-dim masks",
36 |         action="store_true",
37 |     )
38 |     parser.add_argument(
39 |         "--masks-per-dim",
40 |         type=int,
41 |         default=2,
42 |         help="Number of heatmaps per dimension to show",
43 |     )
44 |     parser.add_argument(
45 |         "opts",
46 |         help="Modify model config options using the command-line",
47 |         default=None,
48 |         nargs=argparse.REMAINDER,
49 |     )
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     # load config from file and command-line arguments
54 |     cfg.merge_from_file(args.config_file)
55 |     cfg.merge_from_list(args.opts)
56 |     cfg.freeze()
57 | 
58 |     # prepare object that handles inference plus adds predictions on top of image
59 |     coco_demo = COCODemo(
60 |         cfg,
61 |         confidence_threshold=args.confidence_threshold,
62 |         show_mask_heatmaps=args.show_mask_heatmaps,
63 |         masks_per_dim=args.masks_per_dim,
64 |         min_image_size=args.min_image_size,
65 |     )
66 | 
67 |     cam = cv2.VideoCapture(0)
68 |     while True:
69 |         start_time = time.time()
70 |         ret_val, img = cam.read()
71 |         composite = coco_demo.run_on_opencv_image(img)
72 |         print("Time: {:.2f} s / img".format(time.time() - start_time))
73 |         cv2.imshow("COCO detections", composite)
74 |         if cv2.waitKey(1) == 27:
75 |             break  # esc to quit
76 |     cv2.destroyAllWindows()
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \
11 |  && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev
12 | 
13 | # Install Miniconda
14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
15 |  && chmod +x /miniconda.sh \
16 |  && /miniconda.sh -b -p /miniconda \
17 |  && rm /miniconda.sh
18 | 
19 | ENV PATH=/miniconda/bin:$PATH
20 | 
21 | # Create a Python 3.6 environment
22 | RUN /miniconda/bin/conda install -y conda-build \
23 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
24 |  && /miniconda/bin/conda clean -ya
25 | 
26 | ENV CONDA_DEFAULT_ENV=py36
27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
28 | ENV PATH=$CONDA_PREFIX/bin:$PATH
29 | ENV CONDA_AUTO_UPDATE_CONDA=false
30 | 
31 | RUN conda install -y ipython
32 | RUN pip install requests ninja yacs cython matplotlib opencv-python tqdm
33 | 
34 | # Install PyTorch 1.0 Nightly
35 | ARG CUDA
36 | RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \
37 |  && conda clean -ya
38 | 
39 | # Install TorchVision master
40 | RUN git clone https://github.com/pytorch/vision.git \
41 |  && cd vision \
42 |  && python setup.py install
43 | 
44 | # install pycocotools
45 | RUN git clone https://github.com/cocodataset/cocoapi.git \
46 |  && cd cocoapi/PythonAPI \
47 |  && python setup.py build_ext install
48 | 
49 | # install apex
50 | RUN git clone https://github.com/NVIDIA/apex.git \
51 |  && cd apex \
52 |  && python setup.py install --cuda_ext --cpp_ext
53 | 
54 | # install PyTorch Detection
55 | ARG FORCE_CUDA="1"
56 | ENV FORCE_CUDA=${FORCE_CUDA}
57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
58 |  && cd maskrcnn-benchmark \
59 |  && python setup.py build develop
60 | 
61 | WORKDIR /maskrcnn-benchmark
62 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++
11 | 
12 | # Install Miniconda
13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
14 |  && chmod +x /miniconda.sh \
15 |  && /miniconda.sh -b -p /miniconda \
16 |  && rm /miniconda.sh
17 | 
18 | ENV PATH=/miniconda/bin:$PATH
19 | 
20 | # Create a Python 3.6 environment
21 | RUN /miniconda/bin/conda install -y conda-build \
22 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
23 |  && /miniconda/bin/conda clean -ya
24 | 
25 | ENV CONDA_DEFAULT_ENV=py36
26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
27 | ENV PATH=$CONDA_PREFIX/bin:$PATH
28 | ENV CONDA_AUTO_UPDATE_CONDA=false
29 | 
30 | RUN conda install -y ipython
31 | RUN pip install requests ninja yacs cython matplotlib jupyter tqdm
32 | 
33 | # Install PyTorch Nightly
34 | ARG CUDA
35 | RUN conda install -y pytorch-nightly cudatoolkit=${CUDA} -c pytorch
36 | 
37 | # Install OpenCV
38 | RUN conda install -y opencv -c menpo \
39 |  && conda clean -ya
40 | 
41 | WORKDIR /root
42 | 
43 | USER root
44 | 
45 | RUN mkdir /notebooks
46 | 
47 | WORKDIR /notebooks
48 | 
49 | # Install TorchVision master
50 | RUN git clone https://github.com/pytorch/vision.git \
51 |  && cd vision \
52 |  && python setup.py install
53 | 
54 | # install pycocotools
55 | RUN git clone https://github.com/cocodataset/cocoapi.git \
56 |  && cd cocoapi/PythonAPI \
57 |  && python setup.py build_ext install
58 | 
59 | # install apex
60 | RUN git clone https://github.com/NVIDIA/apex.git \
61 |  && cd apex \
62 |  && python setup.py install --cuda_ext --cpp_ext
63 | 
64 | # install PyTorch Detection
65 | ARG FORCE_CUDA="1"
66 | ENV FORCE_CUDA=${FORCE_CUDA}
67 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
68 |  && cd maskrcnn-benchmark \
69 |  && python setup.py build develop
70 | 
71 | RUN jupyter notebook --generate-config
72 | 
73 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py"
74 | 
75 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH}
76 | 
77 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"]
78 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from IPython.lib import passwd
 3 | 
 4 | # c = c  # pylint:disable=undefined-variable
 5 | c = get_config()
 6 | c.NotebookApp.ip = '0.0.0.0'
 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888))
 8 | c.NotebookApp.open_browser = False
 9 | 
10 | # sets a password if PASSWORD is set in the environment
11 | if 'PASSWORD' in os.environ:
12 |     password = os.environ['PASSWORD']
13 |     if password:
14 |         c.NotebookApp.password = passwd(password)
15 |     else:
16 |         c.NotebookApp.password = ''
17 |         c.NotebookApp.token = ''
18 |     del os.environ['PASSWORD']
19 | 


--------------------------------------------------------------------------------
/inference/voc_2007_test/predictions.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/inference/voc_2007_test/predictions.pth


--------------------------------------------------------------------------------
/inference/voc_2007_test/result.txt:
--------------------------------------------------------------------------------
 1 | mAP: 0.7104
 2 | aeroplane       : 0.7574
 3 | bicycle         : 0.7795
 4 | bird            : 0.6859
 5 | boat            : 0.5605
 6 | bottle          : 0.5571
 7 | bus             : 0.7563
 8 | car             : 0.7944
 9 | cat             : 0.8464
10 | chair           : 0.5045
11 | cow             : 0.7578
12 | diningtable     : 0.6476
13 | dog             : 0.8379
14 | horse           : 0.8421
15 | motorbike       : 0.7838
16 | person          : 0.7808
17 | pottedplant     : 0.4596
18 | sheep           : 0.7417
19 | sofa            : 0.6503
20 | train           : 0.7402
21 | tvmonitor       : 0.7237
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 
18 | 
19 | std::pair<at::Tensor, at::Tensor> soft_nms_cpu(const at::Tensor& dets,
20 |                                                const at::Tensor& scores,
21 |                                                const float threshold,
22 |                                                const float sigma);
23 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/soft_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | std::pair<at::Tensor, at::Tensor> soft_nms(const at::Tensor& dets,
11 |                                            const at::Tensor& scores,
12 |                                            const float threshold,
13 |                                            const float sigma) {
14 | 
15 |   if (dets.type().is_cuda()) {
16 | #ifdef WITH_CUDA
17 |     AT_ERROR("Soft NMS Does Not have GPU support");
18 | #endif
19 |   }
20 | 
21 |   std::pair<at::Tensor, at::Tensor> result = soft_nms_cpu(dets, scores, threshold, sigma);
22 | 
23 |   return result;
24 | }
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "soft_nms.h"
 6 | #include "SigmoidFocalLoss.h"
 7 | #include "deform_conv.h"
 8 | #include "deform_pool.h"
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |   m.def("soft_nms", &soft_nms, "soft-non-maximum suppression");
12 |   m.def("nms", &nms, "non-maximum suppression");
13 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
14 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
15 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
16 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
17 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
18 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
19 |   // dcn-v2
20 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
21 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
22 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
23 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
24 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
25 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
26 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
27 | }


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | from .build_semi import make_data_loader_semi


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 
22 | 
23 | class BBoxAugCollator(object):
24 |     """
25 |     From a list of samples from the dataset,
26 |     returns the images and targets.
27 |     Images should be converted to batched images in `im_detect_bbox_aug`
28 |     """
29 | 
30 |     def __call__(self, batch):
31 |         return list(zip(*batch))
32 | 
33 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset
3 | from .voc import PascalVOCDataset
4 | from .concat_dataset import ConcatDataset
5 | from .unlabeled_img import UnlabeledDataset
6 | 
7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset","UnlabeledDataset"]
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | 
 6 | 
 7 | def evaluate(dataset, predictions, output_folder, **kwargs):
 8 |     """evaluate dataset using different methods based on dataset type.
 9 |     Args:
10 |         dataset: Dataset object
11 |         predictions(list[BoxList]): each item in the list represents the
12 |             prediction results for one image.
13 |         output_folder: output folder, to save evaluation files or results.
14 |         **kwargs: other args.
15 |     Returns:
16 |         evaluation result
17 |     """
18 |     args = dict(
19 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
20 |     )
21 |     if isinstance(dataset, datasets.COCODataset):
22 |         return coco_evaluation(**args)
23 |     elif isinstance(dataset, datasets.PascalVOCDataset):
24 |         return voc_evaluation(**args)
25 |     else:
26 |         dataset_name = dataset.__class__.__name__
27 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
28 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     box_only,
 9 |     iou_types,
10 |     expected_results,
11 |     expected_results_sigma_tol,
12 | ):
13 |     return do_coco_evaluation(
14 |         dataset=dataset,
15 |         predictions=predictions,
16 |         box_only=box_only,
17 |         output_folder=output_folder,
18 |         iou_types=iou_types,
19 |         expected_results=expected_results,
20 |         expected_results_sigma_tol=expected_results_sigma_tol,
21 |     )
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | from .transforms_reverse import trans_reverse
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | from . import transforms_reverse as T_R
 4 | 
 5 | 
 6 | def build_transforms(cfg, is_train=True):
 7 |     if is_train:
 8 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 9 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
10 |         flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
11 |         flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
12 |         brightness = cfg.INPUT.BRIGHTNESS
13 |         contrast = cfg.INPUT.CONTRAST
14 |         saturation = cfg.INPUT.SATURATION
15 |         hue = cfg.INPUT.HUE
16 |     else:
17 |         min_size = cfg.INPUT.MIN_SIZE_TEST
18 |         max_size = cfg.INPUT.MAX_SIZE_TEST
19 |         flip_horizontal_prob = 0.0
20 |         flip_vertical_prob = 0.0
21 |         brightness = 0.0
22 |         contrast = 0.0
23 |         saturation = 0.0
24 |         hue = 0.0
25 | 
26 |     if cfg.INPUT.SEMI:
27 |         TRANS = T_R
28 |     else:
29 |         TRANS = T
30 | 
31 | 
32 |     to_bgr255 = cfg.INPUT.TO_BGR255
33 |     normalize_transform = TRANS.Normalize(
34 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
35 |     )
36 |     color_jitter = TRANS.ColorJitter(
37 |         brightness=brightness,
38 |         contrast=contrast,
39 |         saturation=saturation,
40 |         hue=hue,
41 |     )
42 | 
43 |     transform = TRANS.Compose(
44 |         [
45 |             color_jitter,
46 |             TRANS.Resize(min_size, max_size),
47 |             TRANS.RandomHorizontalFlip(flip_horizontal_prob),
48 |             TRANS.RandomVerticalFlip(flip_vertical_prob),
49 |             TRANS.ToTensor(),
50 |             normalize_transform,
51 |         ]
52 |     )
53 |     return transform
54 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import DFConv2d
 7 | from .misc import ConvTranspose2d
 8 | from .misc import BatchNorm2d
 9 | from .misc import interpolate
10 | from .nms import nms
11 | from .roi_align import ROIAlign
12 | from .roi_align import roi_align
13 | from .roi_pool import ROIPool
14 | from .roi_pool import roi_pool
15 | from .smooth_l1_loss import smooth_l1_loss
16 | from .sigmoid_focal_loss import SigmoidFocalLoss
17 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
18 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
19 | from .dcn.deform_pool_func import deform_roi_pooling
20 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
21 | 
22 | 
23 | __all__ = [
24 |     "nms",
25 |     "roi_align",
26 |     "ROIAlign",
27 |     "roi_pool",
28 |     "ROIPool",
29 |     "smooth_l1_loss",
30 |     "Conv2d",
31 |     "DFConv2d",
32 |     "ConvTranspose2d",
33 |     "interpolate",
34 |     "BatchNorm2d",
35 |     "FrozenBatchNorm2d",
36 |     "SigmoidFocalLoss",
37 |     'deform_conv',
38 |     'modulated_deform_conv',
39 |     'DeformConv',
40 |     'ModulatedDeformConv',
41 |     'ModulatedDeformConvPack',
42 |     'deform_roi_pooling',
43 |     'DeformRoIPooling',
44 |     'DeformRoIPoolingPack',
45 |     'ModulatedDeformRoIPoolingPack',
46 | ]
47 | 
48 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         # Cast all fixed parameters to half() if necessary
21 |         if x.dtype == torch.float16:
22 |             self.weight = self.weight.half()
23 |             self.bias = self.bias.half()
24 |             self.running_mean = self.running_mean.half()
25 |             self.running_var = self.running_var.half()
26 | 
27 |         scale = self.weight * self.running_var.rsqrt()
28 |         bias = self.bias - self.running_mean * scale
29 |         scale = scale.reshape(1, -1, 1, 1)
30 |         bias = bias.reshape(1, -1, 1, 1)
31 |         return x * scale + bias
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | #
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from ._utils import _C
 3 | from maskrcnn_benchmark import _C
 4 | 
 5 | from apex import amp
 6 | 
 7 | # Only valid with fp32 inputs - give AMP the hint
 8 | nms = amp.float_function(_C.nms)
 9 | 
10 | # nms.__doc__ = """
11 | # This function performs Non-maximum suppresion"""
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         output = _C.roi_align_forward(
21 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
22 |         )
23 |         return output
24 | 
25 |     @staticmethod
26 |     @once_differentiable
27 |     def backward(ctx, grad_output):
28 |         rois, = ctx.saved_tensors
29 |         output_size = ctx.output_size
30 |         spatial_scale = ctx.spatial_scale
31 |         sampling_ratio = ctx.sampling_ratio
32 |         bs, ch, h, w = ctx.input_shape
33 |         grad_input = _C.roi_align_backward(
34 |             grad_output,
35 |             rois,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |             sampling_ratio,
44 |         )
45 |         return grad_input, None, None, None, None
46 | 
47 | 
48 | roi_align = _ROIAlign.apply
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     @amp.float_function
58 |     def forward(self, input, rois):
59 |         return roi_align(
60 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
61 |         )
62 | 
63 |     def __repr__(self):
64 |         tmpstr = self.__class__.__name__ + "("
65 |         tmpstr += "output_size=" + str(self.output_size)
66 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
67 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
68 |         tmpstr += ")"
69 |         return tmpstr
70 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIPool(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale):
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.input_shape = input.size()
18 |         output, argmax = _C.roi_pool_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1]
20 |         )
21 |         ctx.save_for_backward(input, roi, argmax)
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         input, rois, argmax = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_pool_backward(
32 |             grad_output,
33 |             input,
34 |             rois,
35 |             argmax,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |         )
44 |         return grad_input, None, None, None
45 | 
46 | 
47 | roi_pool = _ROIPool.apply
48 | 
49 | 
50 | class ROIPool(nn.Module):
51 |     def __init__(self, output_size, spatial_scale):
52 |         super(ROIPool, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 | 
56 |     @amp.float_function
57 |     def forward(self, input, rois):
58 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
59 | 
60 |     def __repr__(self):
61 |         tmpstr = self.__class__.__name__ + "("
62 |         tmpstr += "output_size=" + str(self.output_size)
63 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
64 |         tmpstr += ")"
65 |         return tmpstr
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from maskrcnn_benchmark import _C
 7 | 
 8 | # TODO: Use JIT to replace CUDA implementation in the future.
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets)
13 |         num_classes = logits.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         losses = _C.sigmoid_focalloss_forward(
19 |             logits, targets, num_classes, gamma, alpha
20 |         )
21 |         return losses
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, d_loss):
26 |         logits, targets = ctx.saved_tensors
27 |         num_classes = ctx.num_classes
28 |         gamma = ctx.gamma
29 |         alpha = ctx.alpha
30 |         d_loss = d_loss.contiguous()
31 |         d_logits = _C.sigmoid_focalloss_backward(
32 |             logits, targets, d_loss, num_classes, gamma, alpha
33 |         )
34 |         return d_logits, None, None, None, None
35 | 
36 | 
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 | 
39 | 
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 |     num_classes = logits.shape[1]
42 |     gamma = gamma[0]
43 |     alpha = alpha[0]
44 |     dtype = targets.dtype
45 |     device = targets.device
46 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 | 
48 |     t = targets.unsqueeze(1)
49 |     p = torch.sigmoid(logits)
50 |     term1 = (1 - p) ** gamma * torch.log(p)
51 |     term2 = p ** gamma * torch.log(1 - p)
52 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 | 
54 | 
55 | class SigmoidFocalLoss(nn.Module):
56 |     def __init__(self, gamma, alpha):
57 |         super(SigmoidFocalLoss, self).__init__()
58 |         self.gamma = gamma
59 |         self.alpha = alpha
60 | 
61 |     def forward(self, logits, targets):
62 |         device = logits.device
63 |         if logits.is_cuda:
64 |             loss_func = sigmoid_focal_loss_cuda
65 |         else:
66 |             loss_func = sigmoid_focal_loss_cpu
67 | 
68 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
69 |         return loss.sum()
70 | 
71 |     def __repr__(self):
72 |         tmpstr = self.__class__.__name__ + "("
73 |         tmpstr += "gamma=" + str(self.gamma)
74 |         tmpstr += ", alpha=" + str(self.alpha)
75 |         tmpstr += ")"
76 |         return tmpstr
77 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | from . import fbnet
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | 
 4 | 
 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
 6 | 
 7 | 
 8 | def build_detection_model(cfg):
 9 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
10 |     return meta_arch(cfg)
11 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..roi_heads.roi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRCNN, self).__init__()
28 | 
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg, self.backbone.out_channels)
31 |         self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
32 | 
33 |     def forward(self, images, targets=None):
34 |         """
35 |         Arguments:
36 |             images (list[Tensor] or ImageList): images to be processed
37 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
38 | 
39 |         Returns:
40 |             result (list[BoxList] or dict[Tensor]): the output from the model.
41 |                 During training, it returns a dict[Tensor] which contains the losses.
42 |                 During testing, it returns list[BoxList] contains additional fields
43 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
44 | 
45 |         """
46 |         if self.training and targets is None:
47 |             raise ValueError("In training mode, targets should be passed")
48 |         images = to_image_list(images)
49 |         features = self.backbone(images.tensors)
50 |         proposals,result_rpn, proposal_losses = self.rpn(images, features, targets)
51 |         if self.roi_heads:
52 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
53 |         else:
54 |             # RPN-only models don't have roi_heads
55 |             x = features
56 |             detector_losses = {}
57 |             if self.training:
58 |                 result = result_rpn
59 |                 result['anchors'] = proposals
60 |             else:
61 |                 result = proposals
62 | 
63 | 
64 |         if self.training:
65 |             losses = {}
66 |             losses.update(detector_losses)
67 |             losses.update(proposal_losses)
68 |             return losses,result
69 | 
70 |         return result
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | RPN_HEADS = Registry()
 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 8 | ROI_BOX_PREDICTOR = Registry()
 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
10 | ROI_KEYPOINT_PREDICTOR = Registry()
11 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
12 | ROI_MASK_PREDICTOR = Registry()
13 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.modeling import registry
 3 | from torch import nn
 4 | 
 5 | 
 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, config, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 | 
12 |         num_inputs = in_channels
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
18 |         self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
19 | 
20 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
21 |         nn.init.constant_(self.cls_score.bias, 0)
22 | 
23 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
24 |         nn.init.constant_(self.bbox_pred.bias, 0)
25 | 
26 |     def forward(self, x):
27 |         x = self.avgpool(x)
28 |         x = x.view(x.size(0), -1)
29 |         cls_logit = self.cls_score(x)
30 |         bbox_pred = self.bbox_pred(x)
31 |         return cls_logit, bbox_pred
32 | 
33 | 
34 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor")
35 | class FPNPredictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(FPNPredictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         representation_size = in_channels
40 | 
41 |         self.cls_score = nn.Linear(representation_size, num_classes)
42 |         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
43 |         self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
44 | 
45 |         nn.init.normal_(self.cls_score.weight, std=0.01)
46 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
47 |         for l in [self.cls_score, self.bbox_pred]:
48 |             nn.init.constant_(l.bias, 0)
49 | 
50 |     def forward(self, x):
51 |         if x.ndimension() == 4:
52 |             assert list(x.shape[2:]) == [1, 1]
53 |             x = x.view(x.size(0), -1)
54 |         scores = self.cls_score(x)
55 |         bbox_deltas = self.bbox_pred(x)
56 | 
57 |         return scores, bbox_deltas
58 | 
59 | 
60 | def make_roi_box_predictor(cfg, in_channels):
61 |     func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg, in_channels)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 5 | from .inference import make_roi_keypoint_post_processor
 6 | from .loss import make_roi_keypoint_loss_evaluator
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg, in_channels):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 |         self.predictor = make_roi_keypoint_predictor(
15 |             cfg, self.feature_extractor.out_channels)
16 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
17 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 | 
19 |     def forward(self, features, proposals, targets=None):
20 |         """
21 |         Arguments:
22 |             features (list[Tensor]): feature-maps from possibly several levels
23 |             proposals (list[BoxList]): proposal boxes
24 |             targets (list[BoxList], optional): the ground-truth targets.
25 | 
26 |         Returns:
27 |             x (Tensor): the result of the feature extractor
28 |             proposals (list[BoxList]): during training, the original proposals
29 |                 are returned. During testing, the predicted boxlists are returned
30 |                 with the `mask` field set
31 |             losses (dict[Tensor]): During training, returns the losses for the
32 |                 head. During testing, returns an empty dict.
33 |         """
34 |         if self.training:
35 |             with torch.no_grad():
36 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
37 | 
38 |         x = self.feature_extractor(features, proposals)
39 |         kp_logits = self.predictor(x)
40 | 
41 |         if not self.training:
42 |             result = self.post_processor(kp_logits, proposals)
43 |             return x, result, {}
44 | 
45 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
46 | 
47 |         return x, proposals, dict(loss_kp=loss_kp)
48 | 
49 | 
50 | def build_roi_keypoint_head(cfg, in_channels):
51 |     return ROIKeypointHead(cfg, in_channels)
52 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.modeling import registry
 5 | from maskrcnn_benchmark.modeling.poolers import Pooler
 6 | 
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | 
 9 | 
10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
11 | class KeypointRCNNFeatureExtractor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(KeypointRCNNFeatureExtractor, self).__init__()
14 | 
15 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
16 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
17 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
18 |         pooler = Pooler(
19 |             output_size=(resolution, resolution),
20 |             scales=scales,
21 |             sampling_ratio=sampling_ratio,
22 |         )
23 |         self.pooler = pooler
24 | 
25 |         input_features = in_channels
26 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
27 |         next_feature = input_features
28 |         self.blocks = []
29 |         for layer_idx, layer_features in enumerate(layers, 1):
30 |             layer_name = "conv_fcn{}".format(layer_idx)
31 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
32 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
33 |             nn.init.constant_(module.bias, 0)
34 |             self.add_module(layer_name, module)
35 |             next_feature = layer_features
36 |             self.blocks.append(layer_name)
37 |         self.out_channels = layer_features
38 | 
39 |     def forward(self, x, proposals):
40 |         x = self.pooler(x, proposals)
41 |         for layer_name in self.blocks:
42 |             x = F.relu(getattr(self, layer_name)(x))
43 |         return x
44 | 
45 | 
46 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
47 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
48 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
49 |     ]
50 |     return func(cfg, in_channels)
51 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from maskrcnn_benchmark import layers
 4 | from maskrcnn_benchmark.modeling import registry
 5 | 
 6 | 
 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 8 | class KeypointRCNNPredictor(nn.Module):
 9 |     def __init__(self, cfg, in_channels):
10 |         super(KeypointRCNNPredictor, self).__init__()
11 |         input_features = in_channels
12 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 |         deconv_kernel = 4
14 |         self.kps_score_lowres = layers.ConvTranspose2d(
15 |             input_features,
16 |             num_keypoints,
17 |             deconv_kernel,
18 |             stride=2,
19 |             padding=deconv_kernel // 2 - 1,
20 |         )
21 |         nn.init.kaiming_normal_(
22 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 |         )
24 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
25 |         self.up_scale = 2
26 |         self.out_channels = num_keypoints
27 | 
28 |     def forward(self, x):
29 |         x = self.kps_score_lowres(x)
30 |         x = layers.interpolate(
31 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 |         )
33 |         return x
34 | 
35 | 
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 |     return func(cfg, in_channels)
39 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | from maskrcnn_benchmark.modeling import registry
 8 | 
 9 | 
10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
11 | class MaskRCNNC4Predictor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(MaskRCNNC4Predictor, self).__init__()
14 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 |         num_inputs = in_channels
17 | 
18 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
19 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
20 | 
21 |         for name, param in self.named_parameters():
22 |             if "bias" in name:
23 |                 nn.init.constant_(param, 0)
24 |             elif "weight" in name:
25 |                 # Caffe2 implementation uses MSRAFill, which in fact
26 |                 # corresponds to kaiming_normal_ in PyTorch
27 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
28 | 
29 |     def forward(self, x):
30 |         x = F.relu(self.conv5_mask(x))
31 |         return self.mask_fcn_logits(x)
32 | 
33 | 
34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
35 | class MaskRCNNConv1x1Predictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(MaskRCNNConv1x1Predictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         num_inputs = in_channels
40 | 
41 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
42 | 
43 |         for name, param in self.named_parameters():
44 |             if "bias" in name:
45 |                 nn.init.constant_(param, 0)
46 |             elif "weight" in name:
47 |                 # Caffe2 implementation uses MSRAFill, which in fact
48 |                 # corresponds to kaiming_normal_ in PyTorch
49 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
50 | 
51 |     def forward(self, x):
52 |         return self.mask_fcn_logits(x)
53 | 
54 | 
55 | def make_roi_mask_predictor(cfg, in_channels):
56 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
57 |     return func(cfg, in_channels)
58 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/semi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/semi/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR
 5 | 
 6 | 
 7 | def make_optimizer(cfg, model):
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 |         if not value.requires_grad:
11 |             continue
12 |         lr = cfg.SOLVER.BASE_LR
13 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
14 |         if "bias" in key:
15 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
16 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
17 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
18 | 
19 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
20 |     return optimizer
21 | 
22 | 
23 | def make_lr_scheduler(cfg, optimizer):
24 |     return WarmupMultiStepLR(
25 |         optimizer,
26 |         cfg.SOLVER.STEPS,
27 |         cfg.SOLVER.GAMMA,
28 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
29 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
30 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
31 |     )
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         if tensors.dim() == 3:
45 |             tensors = tensors[None]
46 |         assert tensors.dim() == 4
47 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
48 |         return ImageList(tensors, image_sizes)
49 |     elif isinstance(tensors, (tuple, list)):
50 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
51 | 
52 |         # TODO Ideally, just remove this and let me model handle arbitrary
53 |         # input sizs
54 |         if size_divisible > 0:
55 |             import math
56 | 
57 |             stride = size_divisible
58 |             max_size = list(max_size)
59 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
60 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
61 |             max_size = tuple(max_size)
62 | 
63 |         batch_shape = (len(tensors),) + max_size
64 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
65 |         for img, pad_img in zip(tensors, batched_imgs):
66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
67 | 
68 |         image_sizes = [im.shape[-2:] for im in tensors]
69 | 
70 |         return ImageList(batched_imgs, image_sizes)
71 |     else:
72 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
73 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import json
 4 | import logging
 5 | import os
 6 | from .comm import is_main_process
 7 | 
 8 | 
 9 | def mkdir(path):
10 |     try:
11 |         os.makedirs(path)
12 |     except OSError as e:
13 |         if e.errno != errno.EEXIST:
14 |             raise
15 | 
16 | 
17 | def save_labels(dataset_list, output_dir):
18 |     if is_main_process():
19 |         logger = logging.getLogger(__name__)
20 | 
21 |         ids_to_labels = {}
22 |         for dataset in dataset_list:
23 |             if hasattr(dataset, 'categories'):
24 |                 ids_to_labels.update(dataset.categories)
25 |             else:
26 |                 logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format(
27 |                     dataset.__class__.__name__))
28 | 
29 |         if ids_to_labels:
30 |             labels_file = os.path.join(output_dir, 'labels.json')
31 |             logger.info("Saving labels mapping into {}".format(labels_file))
32 |             with open(labels_file, 'w') as f:
33 |                 json.dump(ids_to_labels, f, indent=2)
34 | 
35 | 
36 | def save_config(cfg, path):
37 |     if is_main_process():
38 |         with open(path, 'w') as f:
39 |             f.write(cfg.dump())
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import time
 5 | import datetime
 6 | 
 7 | 
 8 | class Timer(object):
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     @property
13 |     def average_time(self):
14 |         return self.total_time / self.calls if self.calls > 0 else 0.0
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.add(time.time() - self.start_time)
23 |         if average:
24 |             return self.average_time
25 |         else:
26 |             return self.diff
27 | 
28 |     def add(self, time_diff):
29 |         self.diff = time_diff
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 | 
33 |     def reset(self):
34 |         self.total_time = 0.0
35 |         self.calls = 0
36 |         self.start_time = 0.0
37 |         self.diff = 0.0
38 | 
39 |     def avg_time_str(self):
40 |         time_str = str(datetime.timedelta(seconds=self.average_time))
41 |         return time_str
42 | 
43 | 
44 | def get_time_str(time_diff):
45 |     time_str = str(datetime.timedelta(seconds=time_diff))
46 |     return time_str
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | yacs
3 | cython
4 | matplotlib
5 | tqdm
6 | 


--------------------------------------------------------------------------------
/semi_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShouyangDong/tse-t/1582a01f799d366f4b7136e37534dbabbf29b0de/semi_test/__init__.py


--------------------------------------------------------------------------------
/semi_test/disk_clear.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import numpy as np
 4 | import shutil
 5 | import torch
 6 | from tqdm import tqdm
 7 | 
 8 | keep_last = 5
 9 | filter_int = 390000
10 | filter_int2 = 180000
11 | 
12 | str_ens_folder = glob.glob('./model_path/*')
13 | pt_count = []
14 | for i,str_folder in enumerate(str_ens_folder):
15 |     # if i < 105834:
16 |     #     continue
17 |     print(i)
18 |     pts = glob.glob(os.path.join(str_folder,'*.pt'))
19 |     pt_count.append(len(pts))
20 |     #[torch.load(_pt) for _pt in pts]
21 |     if len(pts) == 0:
22 |         continue
23 |     if len(pts) < keep_last:
24 |         continue
25 | 
26 |     # pts_iter = [int(_iter.split('_x')[-1].replace('.pt','')) for _iter in pts]
27 |     # idx_sorted = np.argsort(pts_iter)
28 |     # pts_sorted = np.array(pts)[idx_sorted][:len(pts) - keep_last]
29 |     # [os.remove(_file) for _file in pts_sorted]
30 |     #-----------------filter by range
31 |     pts_iter = [int(_iter.split('_x')[-1].replace('.pt','')) for _iter in pts]
32 |     sel_ind = np.asarray(pts_iter)>filter_int
33 |     
34 |     pts_sel = np.asarray(pts)[sel_ind]
35 |     [os.remove(_file) for _file in pts_sel]
36 | 
37 | print(np.amax(pt_count),np.amin(pt_count))
38 | 


--------------------------------------------------------------------------------
/semi_test/e2e_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val","voc_2012_train","voc_2012_val")
12 |   TEST: ("voc_2007_test","voc_2012_train","voc_2012_val")
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 91750000
18 |   IMS_PER_BATCH: 4
19 |   CHECKPOINT_PERIOD: 2000
20 | INPUT: 
21 |   SEMI: True
22 | TEST:
23 |   IMS_PER_BATCH: 4
24 | DATALOADER:
25 |   NUM_WORKERS: 16  
26 | SEMI:
27 |   EMA_DECAY: 0.99
28 |   TEMPORAL_NUM: 10
29 |   TEMPORAL_SAVE_PATH: /model
30 |   FG_IOU_THRESHOLD: 0.6
31 |   BG_IOU_THRESHOLD: 0.1
32 |   SEMI_WEIGHT: 0.4
33 | 
34 |   
35 | OUTPUT_DIR: ./output_dir/
36 |   
37 | 


--------------------------------------------------------------------------------
/semi_test/retina_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     ANCHOR_SIZES: (128, 256, 512)
16 |     PRE_NMS_TOP_N_TRAIN: 2000
17 |     PRE_NMS_TOP_N_TEST: 1000
18 |     POST_NMS_TOP_N_TEST: 1000
19 |     FPN_POST_NMS_TOP_N_TEST: 1000
20 |   ROI_HEADS:
21 |     USE_FPN: True
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 7
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     POOLER_SAMPLING_RATIO: 2
27 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
28 |     PREDICTOR: "FPNPredictor"
29 |   RETINANET:
30 |     SCALES_PER_OCTAVE: 3
31 |     STRADDLE_THRESH: -1
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 |     NUM_CLASSES: 21
35 | 
36 | DATASETS:
37 |   TRAIN: ("voc_2007_train", "voc_2007_val")
38 |   TEST: ("voc_2007_test",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (120000, 160000)
51 |   MAX_ITER: 180000
52 |   IMS_PER_BATCH: 8
53 | OUTPUT_DIR: ./exp_retina_07/
54 | 


--------------------------------------------------------------------------------
/semi_test/retina_voc_paper.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     ANCHOR_SIZES: (128, 256, 512)
16 |     PRE_NMS_TOP_N_TRAIN: 2000
17 |     PRE_NMS_TOP_N_TEST: 1000
18 |     POST_NMS_TOP_N_TEST: 1000
19 |     FPN_POST_NMS_TOP_N_TEST: 1000
20 |   ROI_HEADS:
21 |     USE_FPN: True
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 7
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     POOLER_SAMPLING_RATIO: 2
27 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
28 |     PREDICTOR: "FPNPredictor"
29 |   RETINANET:
30 |     SCALES_PER_OCTAVE: 3
31 |     STRADDLE_THRESH: -1
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 |     NUM_CLASSES: 21
35 | 
36 | DATASETS:
37 |   TRAIN: ("voc_2007_train", "voc_2007_val","voc_2012_train","voc_2012_val",)
38 |   TEST: ("voc_2007_test",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (120000, 160000)
51 |   MAX_ITER: 180000
52 |   IMS_PER_BATCH: 8
53 | SEMI:
54 |   EMA_DECAY: 0.99
55 |   TEMPORAL_NUM: 10
56 |   TEMPORAL_SAVE_PATH: ./model_folder
57 |   FG_IOU_THRESHOLD: 0.6
58 |   BG_IOU_THRESHOLD: 0.1
59 |   SEMI_WEIGHT: 10.
60 | 
61 | OUTPUT_DIR: ./semi_base_ensempble/
62 | 


--------------------------------------------------------------------------------
/semi_test/retina_voc_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     ANCHOR_SIZES: (128, 256, 512)
16 |     PRE_NMS_TOP_N_TRAIN: 2000
17 |     PRE_NMS_TOP_N_TEST: 1000
18 |     POST_NMS_TOP_N_TEST: 1000
19 |     FPN_POST_NMS_TOP_N_TEST: 1000
20 |   ROI_HEADS:
21 |     USE_FPN: True
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 7
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     POOLER_SAMPLING_RATIO: 2
27 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
28 |     PREDICTOR: "FPNPredictor"
29 |   RETINANET:
30 |     SCALES_PER_OCTAVE: 3
31 |     STRADDLE_THRESH: -1
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 |     NUM_CLASSES: 21
35 | 
36 | DATASETS:
37 |   TRAIN: ("voc_2007_train", "voc_2007_val","voc_2012_train","voc_2012_val","voc_2012_test")
38 |   TEST: ("voc_2007_test",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (120000, 160000)
51 |   MAX_ITER: 180000
52 |   IMS_PER_BATCH: 8
53 | SEMI:
54 |   EMA_DECAY: 0.99
55 |   TEMPORAL_NUM: 10
56 |   TEMPORAL_SAVE_PATH: ./tempor_pred_07_12/
57 |   FG_IOU_THRESHOLD: 0.6
58 |   BG_IOU_THRESHOLD: 0.1
59 |   SEMI_WEIGHT: 10.
60 | 
61 | OUTPUT_DIR: ./semi_12test
62 | 


--------------------------------------------------------------------------------
/semi_test/retinanet_R-101-FPN_1.5x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800, )
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (180000, 240000)
48 |   MAX_ITER: 270000
49 |   IMS_PER_BATCH: 8
50 | 
51 | 
52 | 
53 | OUTPUT_DIR: ./exp_retina_101-FPN1.5x/


--------------------------------------------------------------------------------
/semi_test/retinanet_R-101-FPN_1.5x_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800, )
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (180000, 240000)
48 |   MAX_ITER: 270000
49 |   IMS_PER_BATCH: 8
50 | 
51 | SEMI:
52 |   EMA_DECAY: 0.99
53 |   TEMPORAL_NUM: 5
54 |   TEMPORAL_SAVE_PATH: /tempor_coco_R101_1.5x_semi/
55 |   FG_IOU_THRESHOLD: 0.6
56 |   BG_IOU_THRESHOLD: 0.1
57 |   SEMI_WEIGHT: 10.
58 |   SEMI_STEP: -1
59 |   HYPER_PARAMETERS:
60 |     ENS_THRE: 0.15
61 |     EPOCH_BATCH_NUM: 14786
62 |     START_ITER: 3
63 |     RAMPUP_LENGTH: 13
64 |     MAX_CONSISTENT_LOSS: 0.08
65 |     CONSISTENCE_WEIGHT: 0.1
66 |     LABELED_DATABASE: [1,]
67 |     REG_CONSIST_WEIGHT: 16
68 |     NCLS: 80
69 | 
70 | 
71 | OUTPUT_DIR: ./exp_retina_101-FPN1.5x/


--------------------------------------------------------------------------------
/semi_test/retinanet_R-101-FPN_1x_unlabeled.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival","coco_2014_unlabeled",) #("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",) #("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (780,800,810,820,830,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 880000
49 |   IMS_PER_BATCH: 8
50 | 
51 | SEMI:
52 |   EMA_DECAY: 0.9997
53 |   TEMPORAL_NUM: 10
54 |   TEMPORAL_SAVE_PATH: /tempor_pred_coco_unlabeled_r101/
55 |   FG_IOU_THRESHOLD: 0.6
56 |   BG_IOU_THRESHOLD: 0.1
57 |   SEMI_WEIGHT: 10.
58 |   SEMI_STEP: -1
59 |   SEL_THR: 0.1
60 |   HYPER_PARAMETERS:
61 |     ENS_THRE: 0.05
62 |     EPOCH_BATCH_NUM: 27210
63 |     START_ITER: 3
64 |     RAMPUP_LENGTH: 13
65 |     MAX_CONSISTENT_LOSS: 0.05
66 |     CONSISTENCE_WEIGHT: 0.5
67 |     LABELED_DATABASE: [0,1,]
68 | 
69 | 
70 | OUTPUT_DIR: ./exp_R-101-FPN_1x_unlabeled/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_1x_coco_unlabeled.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train","coco_2014_valminusminival","coco_2014_unlabeled",)
35 |   TEST: ("coco_2014_unlabeled",)
36 |   #("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800,)
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 |   NUM_WORKERS: 4
45 | SOLVER:
46 |   # Assume 4 gpus
47 |   BASE_LR: 0.002
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (40000, 90000)
50 |   MAX_ITER: 980000
51 |   IMS_PER_BATCH: 8
52 |   
53 | SEMI:
54 |   EMA_DECAY: 0.99
55 |   TEMPORAL_NUM: 10
56 |   TEMPORAL_SAVE_PATH: /tempor_pred_coco_unlabeled/
57 |   FG_IOU_THRESHOLD: 0.6
58 |   BG_IOU_THRESHOLD: 0.1
59 |   SEMI_WEIGHT: 10.
60 |   SEMI_STEP: -1
61 | 
62 | 
63 | OUTPUT_DIR: ./exp_retina_ms_coco_unlabeled/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_1x_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train","coco_2014_valminusminival","coco_2014_valminusminival")
35 |   TEST: ("coco_2014_valminusminival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 |   NUM_WORKERS: 4
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 280000
50 |   IMS_PER_BATCH: 8
51 |   
52 | SEMI:
53 |   EMA_DECAY: 0.99
54 |   TEMPORAL_NUM: 10
55 |   TEMPORAL_SAVE_PATH: ./tempor_pred_coco_bn8/
56 |   FG_IOU_THRESHOLD: 0.6
57 |   BG_IOU_THRESHOLD: 0.1
58 |   SEMI_WEIGHT: 10.
59 |   SEMI_STEP: 120000
60 | 
61 | 
62 | OUTPUT_DIR: ./exp_retina_ms_coco_train_bn8_semi/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_1x_semi_resize.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train","coco_2014_valminusminival",)
35 |   TEST: ("coco_2014_train",) #"coco_2014_minival",
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,810,820,830,850,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 |   NUM_WORKERS: 4
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.002
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (40000, 90000)
49 |   MAX_ITER: 980000
50 |   IMS_PER_BATCH: 8
51 |   
52 | SEMI:
53 |   EMA_DECAY: 0.99
54 |   TEMPORAL_NUM: 5
55 |   TEMPORAL_SAVE_PATH: /tempor_pred_coco_bn8_resize/
56 |   FG_IOU_THRESHOLD: 0.6
57 |   BG_IOU_THRESHOLD: 0.1
58 |   SEMI_WEIGHT: 10.
59 |   SEMI_STEP: -1
60 |   HYPER_PARAMETERS:
61 |     ENS_THRE: 0.15
62 |     EPOCH_BATCH_NUM: 27210
63 |     START_ITER: 3
64 |     RAMPUP_LENGTH: 13
65 |     MAX_CONSISTENT_LOSS: 0.08
66 |     CONSISTENCE_WEIGHT: 0.5
67 |     LABELED_DATABASE: [0,]
68 | 
69 | OUTPUT_DIR: ./exp_retina_ms_coco_train_bn8_resize/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_1x_semi_step2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train","coco_2014_valminusminival","coco_2014_valminusminival")
35 |   TEST: ("coco_2014_valminusminival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 |   NUM_WORKERS: 4
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.002
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (40000, 90000)
49 |   MAX_ITER: 980000
50 |   IMS_PER_BATCH: 8
51 |   
52 | SEMI:
53 |   EMA_DECAY: 0.99
54 |   TEMPORAL_NUM: 10
55 |   TEMPORAL_SAVE_PATH: ./tempor_pred_coco_bn8/
56 |   FG_IOU_THRESHOLD: 0.6
57 |   BG_IOU_THRESHOLD: 0.1
58 |   SEMI_WEIGHT: 10.
59 |   SEMI_STEP: -1
60 | 
61 | 
62 | OUTPUT_DIR: ./exp_retina_ms_coco_train_bn8_semi_v2/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_val.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_valminusminival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (80000, 120000)
48 |   MAX_ITER: 140000
49 |   IMS_PER_BATCH: 8
50 | 
51 | 
52 | OUTPUT_DIR: ./exp_R-50-FPN_val/  


--------------------------------------------------------------------------------
/semi_test/retinanet_R-50-FPN_val_semi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train","coco_2014_valminusminival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,810,820,830,850)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (80000, 120000)
48 |   MAX_ITER: 1400000
49 |   IMS_PER_BATCH: 8
50 | 
51 | SEMI:
52 |   EMA_DECAY: 0.99
53 |   TEMPORAL_NUM: 5
54 |   TEMPORAL_SAVE_PATH: ./tempor_coco_R50_val/
55 |   FG_IOU_THRESHOLD: 0.6
56 |   BG_IOU_THRESHOLD: 0.1
57 |   SEMI_WEIGHT: 10.
58 |   SEMI_STEP: -1
59 |   HYPER_PARAMETERS:
60 |     ENS_THRE: 0.15
61 |     EPOCH_BATCH_NUM: 14786
62 |     START_ITER: 3
63 |     RAMPUP_LENGTH: 13
64 |     MAX_CONSISTENT_LOSS: 0.1
65 |     CONSISTENCE_WEIGHT: 0.1
66 |     LABELED_DATABASE: [1,]
67 |     REG_CONSIST_WEIGHT: 25
68 |     NCLS: 80
69 | 
70 | 
71 | OUTPUT_DIR: ./exp_R-50-FPN_val_semi/  


--------------------------------------------------------------------------------
/semi_test/retinanet_X_101_32x8d_FPN_1x_unlableled.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |     STRIDE_IN_1X1: False
11 |     NUM_GROUPS: 32
12 |     WIDTH_PER_GROUP: 8
13 |   RPN:
14 |     USE_FPN: True
15 |     FG_IOU_THRESHOLD: 0.5
16 |     BG_IOU_THRESHOLD: 0.4
17 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |     PRE_NMS_TOP_N_TRAIN: 2000
19 |     PRE_NMS_TOP_N_TEST: 1000
20 |     POST_NMS_TOP_N_TEST: 1000
21 |     FPN_POST_NMS_TOP_N_TEST: 1000
22 |   ROI_HEADS:
23 |     USE_FPN: True
24 |     BATCH_SIZE_PER_IMAGE: 256
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   RETINANET:
32 |     SCALES_PER_OCTAVE: 3
33 |     STRADDLE_THRESH: -1
34 |     FG_IOU_THRESHOLD: 0.5
35 |     BG_IOU_THRESHOLD: 0.4
36 | DATASETS:
37 |   TRAIN: ("coco_2014_unlabeled","coco_2014_unlabeled","coco_2014_unlabeled",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800, )
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (240000, 320000)
51 |   MAX_ITER: 960000
52 |   IMS_PER_BATCH: 4
53 | 
54 | 
55 |   
56 | SEMI:
57 |   EMA_DECAY: 0.997
58 |   TEMPORAL_NUM: 10
59 |   TEMPORAL_SAVE_PATH: ./tempor_pred_coco_unlabeled_x101/
60 |   FG_IOU_THRESHOLD: 0.6
61 |   BG_IOU_THRESHOLD: 0.1
62 |   SEMI_WEIGHT: 10.
63 |   SEMI_STEP: -1
64 | 
65 | 
66 | OUTPUT_DIR: ./exp_retina_ms_coco_unlabeled_X101/  


--------------------------------------------------------------------------------
/semi_test/tempor_ens_cp.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | import os
 5 | import shutil
 6 | 
 7 | src_folder = './tempor_pred_save/'
 8 | target_folder = './tempor_pred_save_0_17500/'
 9 | iter_start = 17500
10 | iter_stop = 25000
11 | if not os.path.exists(target_folder):
12 |     os.mkdir(target_folder)
13 | 
14 | temporal_ens = glob.glob(src_folder+'*')
15 | 
16 | for _iter in tqdm(temporal_ens):
17 |     pts = glob.glob(os.path.join(_iter,'*.pt'))
18 |     if len(pts)<1:
19 |         continue
20 | 
21 |     img_id = os.path.basename(_iter)
22 |     pts_iter = [int(_iter.split('_x')[-1].replace('.pt','')) for _iter in pts]
23 |     sel_ind = np.logical_and(np.array(pts_iter) > iter_start , np.array(pts_iter) < iter_stop)
24 |     cp_files = np.array(pts)[sel_ind]
25 | 
26 |     target_img_folder = os.path.join(target_folder,img_id)
27 |     
28 |     if not os.path.exists(target_img_folder):
29 |         os.mkdir(target_img_folder)
30 | 
31 |     for _im in cp_files:
32 |         _im_id = os.path.basename(_im)
33 |         _img_target = os.path.join(target_img_folder,_im_id)
34 |         shutil.copy(_im,_img_target)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/semi_test/test_units.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Set up custom environment before nearly anything else is imported
 3 | # NOTE: this should be the first import (no not reorder)
 4 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 5 | 
 6 | import argparse
 7 | import os
 8 | 
 9 | import torch
10 | from maskrcnn_benchmark.config import cfg
11 | from maskrcnn_benchmark.data import make_data_loader
12 | from semi_test.inference_ens import inference
13 | from maskrcnn_benchmark.modeling.detector import build_detection_model
14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank
17 | from maskrcnn_benchmark.utils.logger import setup_logger
18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir
19 | from maskrcnn_benchmark.modeling.rpn.retinanet.inference import make_retinanet_postprocessor
20 | from maskrcnn_benchmark.structures.bounding_box import BoxList
21 | from maskrcnn_benchmark.modeling.box_coder import BoxCoder
22 | from maskrcnn_benchmark.data.datasets import UnlabeledDataset
23 | from semi_test.semi_loss import make_semi_box_loss_evaluator
24 | # Check if we can enable mixed-precision via apex.amp
25 | try:
26 |     from apex import amp
27 | except ImportError:
28 |     raise ImportError('Use APEX for mixed precision via apex.amp')
29 | import multiprocessing
30 | multiprocessing.set_start_method('spawn', True)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     ds = UnlabeledDataset('/MS_COCO/annotations/image_info_unlabeled2017.json','datasets/coco/unlabeled2017','/tempor_pred_coco_unlabeled/')


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/tests/env_tests/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | def get_config_root_path():
 7 |     ''' Path to configs for unit tests '''
 8 |     # cur_file_dir is root/tests/env_tests
 9 |     cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
10 |     ret = os.path.dirname(os.path.dirname(cur_file_dir))
11 |     ret = os.path.join(ret, "configs")
12 |     return ret
13 | 


--------------------------------------------------------------------------------
/tests/test_backbones.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register backbones
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling import registry
 9 | from maskrcnn_benchmark.config import cfg as g_cfg
10 | from utils import load_config
11 | 
12 | 
13 | # overwrite configs if specified, otherwise default config is used
14 | BACKBONE_CFGS = {
15 |     "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml",
16 |     "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
17 |     "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
18 |     "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml",
19 |     "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml",
20 | }
21 | 
22 | 
23 | class TestBackbones(unittest.TestCase):
24 |     def test_build_backbones(self):
25 |         ''' Make sure backbones run '''
26 | 
27 |         self.assertGreater(len(registry.BACKBONES), 0)
28 | 
29 |         for name, backbone_builder in registry.BACKBONES.items():
30 |             print('Testing {}...'.format(name))
31 |             if name in BACKBONE_CFGS:
32 |                 cfg = load_config(BACKBONE_CFGS[name])
33 |             else:
34 |                 # Use default config if config file is not specified
35 |                 cfg = copy.deepcopy(g_cfg)
36 |             backbone = backbone_builder(cfg)
37 | 
38 |             # make sures the backbone has `out_channels`
39 |             self.assertIsNotNone(
40 |                 getattr(backbone, 'out_channels', None),
41 |                 'Need to provide out_channels for backbone {}'.format(name)
42 |             )
43 | 
44 |             N, C_in, H, W = 2, 3, 224, 256
45 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
46 |             out = backbone(input)
47 |             for cur_out in out:
48 |                 self.assertEqual(
49 |                     cur_out.shape[:2],
50 |                     torch.Size([N, backbone.out_channels])
51 |                 )
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/test_configs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import glob
 5 | import os
 6 | import utils
 7 | 
 8 | 
 9 | class TestConfigs(unittest.TestCase):
10 |     def test_configs_load(self):
11 |         ''' Make sure configs are loadable '''
12 | 
13 |         cfg_root_path = utils.get_config_root_path()
14 |         files = glob.glob(
15 |             os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True)
16 |         self.assertGreater(len(files), 0)
17 | 
18 |         for fn in files:
19 |             print('Loading {}...'.format(fn))
20 |             utils.load_config_from_file(fn)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/tests/test_metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | 
 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 5 | 
 6 | 
 7 | class TestMetricLogger(unittest.TestCase):
 8 |     def test_update(self):
 9 |         meter = MetricLogger()
10 |         for i in range(10):
11 |             meter.update(metric=float(i))
12 |         
13 |         m = meter.meters["metric"]
14 |         self.assertEqual(m.count, 10)
15 |         self.assertEqual(m.total, 45)
16 |         self.assertEqual(m.median, 4)
17 |         self.assertEqual(m.avg, 4.5)
18 | 
19 |     def test_no_attr(self):
20 |         meter = MetricLogger()
21 |         _ = meter.meters
22 |         _ = meter.delimiter
23 |         def broken():
24 |             _ = meter.not_existent
25 |         self.assertRaises(AttributeError, broken)
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/tests/test_rpn_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register rpn heads
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA
 9 | from maskrcnn_benchmark.modeling import registry
10 | from maskrcnn_benchmark.config import cfg as g_cfg
11 | from utils import load_config
12 | 
13 | 
14 | # overwrite configs if specified, otherwise default config is used
15 | RPN_CFGS = {
16 | }
17 | 
18 | 
19 | class TestRPNHeads(unittest.TestCase):
20 |     def test_build_rpn_heads(self):
21 |         ''' Make sure rpn heads run '''
22 | 
23 |         self.assertGreater(len(registry.RPN_HEADS), 0)
24 | 
25 |         in_channels = 64
26 |         num_anchors = 10
27 | 
28 |         for name, builder in registry.RPN_HEADS.items():
29 |             print('Testing {}...'.format(name))
30 |             if name in RPN_CFGS:
31 |                 cfg = load_config(RPN_CFGS[name])
32 |             else:
33 |                 # Use default config if config file is not specified
34 |                 cfg = copy.deepcopy(g_cfg)
35 | 
36 |             rpn = builder(cfg, in_channels, num_anchors)
37 | 
38 |             N, C_in, H, W = 2, in_channels, 24, 32
39 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
40 |             LAYERS = 3
41 |             out = rpn([input] * LAYERS)
42 |             self.assertEqual(len(out), 2)
43 |             logits, bbox_reg = out
44 |             for idx in range(LAYERS):
45 |                 self.assertEqual(
46 |                     logits[idx].shape,
47 |                     torch.Size([
48 |                         input.shape[0], num_anchors,
49 |                         input.shape[2], input.shape[3],
50 |                     ])
51 |                 )
52 |                 self.assertEqual(
53 |                     bbox_reg[idx].shape,
54 |                     torch.Size([
55 |                         logits[idx].shape[0], num_anchors * 4,
56 |                         logits[idx].shape[2], logits[idx].shape[3],
57 |                     ]),
58 |                 )
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------
/tests/test_segmentation_mask.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | import torch
 4 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
 5 | 
 6 | 
 7 | class TestSegmentationMask(unittest.TestCase):
 8 |     def __init__(self, method_name='runTest'):
 9 |         super(TestSegmentationMask, self).__init__(method_name)
10 |         poly = [[[423.0, 306.5, 406.5, 277.0, 400.0, 271.5, 389.5, 277.0,
11 |                   387.5, 292.0, 384.5, 295.0, 374.5, 220.0, 378.5, 210.0,
12 |                   391.0, 200.5, 404.0, 199.5, 414.0, 203.5, 425.5, 221.0,
13 |                   438.5, 297.0, 423.0, 306.5],
14 |                  [100, 100,     200, 100,     200, 200,     100, 200],
15 |                 ]]
16 |         width = 640
17 |         height = 480
18 |         size = width, height
19 | 
20 |         self.P = SegmentationMask(poly, size, 'poly')
21 |         self.M = SegmentationMask(poly, size, 'poly').convert('mask')
22 | 
23 |     def L1(self, A, B):
24 |         diff = A.get_mask_tensor() - B.get_mask_tensor()
25 |         diff = torch.sum(torch.abs(diff.float())).item()
26 |         return diff
27 | 
28 |     def test_convert(self):
29 |         M_hat = self.M.convert('poly').convert('mask')
30 |         P_hat = self.P.convert('mask').convert('poly')
31 | 
32 |         diff_mask = self.L1(self.M, M_hat)
33 |         diff_poly = self.L1(self.P, P_hat)
34 |         self.assertTrue(diff_mask == diff_poly)
35 |         self.assertTrue(diff_mask <= 8169.)
36 |         self.assertTrue(diff_poly <= 8169.)
37 | 
38 |     def test_crop(self):
39 |         box = [400, 250, 500, 300] # xyxy
40 |         diff = self.L1(self.M.crop(box), self.P.crop(box))
41 |         self.assertTrue(diff <= 1.)
42 | 
43 |     def test_resize(self):
44 |         new_size = 50, 25
45 |         M_hat = self.M.resize(new_size)
46 |         P_hat = self.P.resize(new_size)
47 |         diff = self.L1(M_hat, P_hat)
48 | 
49 |         self.assertTrue(self.M.size == self.P.size)
50 |         self.assertTrue(M_hat.size == P_hat.size)
51 |         self.assertTrue(self.M.size != M_hat.size)
52 |         self.assertTrue(diff <= 255.)
53 | 
54 |     def test_transpose(self):
55 |         FLIP_LEFT_RIGHT = 0
56 |         FLIP_TOP_BOTTOM = 1
57 |         diff_hor = self.L1(self.M.transpose(FLIP_LEFT_RIGHT),
58 |                            self.P.transpose(FLIP_LEFT_RIGHT))
59 | 
60 |         diff_ver = self.L1(self.M.transpose(FLIP_TOP_BOTTOM),
61 |                            self.P.transpose(FLIP_TOP_BOTTOM))
62 | 
63 |         self.assertTrue(diff_hor <= 53250.)
64 |         self.assertTrue(diff_ver <= 42494.)
65 | 
66 | 
67 | if __name__ == "__main__":
68 | 
69 |     unittest.main()
70 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | 
 3 | # Set up custom environment before nearly anything else is imported
 4 | # NOTE: this should be the first import (no not reorder)
 5 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 6 | import env_tests.env as env_tests
 7 | 
 8 | import os
 9 | import copy
10 | 
11 | from maskrcnn_benchmark.config import cfg as g_cfg
12 | 
13 | 
14 | def get_config_root_path():
15 |     return env_tests.get_config_root_path()
16 | 
17 | 
18 | def load_config(rel_path):
19 |     ''' Load config from file path specified as path relative to config_root '''
20 |     cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path)
21 |     return load_config_from_file(cfg_path)
22 | 
23 | 
24 | def load_config_from_file(file_path):
25 |     ''' Load config from file path specified as absolute path '''
26 |     ret = copy.deepcopy(g_cfg)
27 |     ret.merge_from_file(file_path)
28 |     return ret
29 | 


--------------------------------------------------------------------------------
/tools/datasets/voc/VOC2007:
--------------------------------------------------------------------------------
1 | /home/chencong/DiscE/web_db/voc/VOCdevkit/VOC2007/


--------------------------------------------------------------------------------
/tools/datasets/voc/VOC2012:
--------------------------------------------------------------------------------
1 | /home/chencong/DiscE/web_db/voc/VOCdevkit/VOC2012


--------------------------------------------------------------------------------