├── .gitignore ├── INSTALL.md ├── LICENSE ├── README.md ├── configs ├── caffe2 │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── cityscapes │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── dcn │ ├── README.md │ ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml │ ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml │ └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_faster_rcnn_fbnet.yaml ├── e2e_faster_rcnn_fbnet_600.yaml ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_fbnet.yaml ├── e2e_mask_rcnn_fbnet_600.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml ├── fewshot │ ├── base │ │ ├── e2e_coco_base.yaml │ │ ├── e2e_voc_split1_base.yaml │ │ ├── e2e_voc_split2_base.yaml │ │ └── e2e_voc_split3_base.yaml │ └── standard │ │ ├── e2e_coco_10shot_finetune.yaml │ │ ├── e2e_coco_30shot_finetune.yaml │ │ ├── e2e_voc_split1_10shot_finetune.yaml │ │ ├── e2e_voc_split1_1shot_finetune.yaml │ │ ├── e2e_voc_split1_2shot_finetune.yaml │ │ ├── e2e_voc_split1_3shot_finetune.yaml │ │ ├── e2e_voc_split1_5shot_finetune.yaml │ │ ├── e2e_voc_split2_10shot_finetune.yaml │ │ ├── e2e_voc_split2_1shot_finetune.yaml │ │ ├── e2e_voc_split2_2shot_finetune.yaml │ │ ├── e2e_voc_split2_3shot_finetune.yaml │ │ ├── e2e_voc_split2_5shot_finetune.yaml │ │ ├── e2e_voc_split3_10shot_finetune.yaml │ │ ├── e2e_voc_split3_1shot_finetune.yaml │ │ ├── e2e_voc_split3_2shot_finetune.yaml │ │ ├── e2e_voc_split3_3shot_finetune.yaml │ │ └── e2e_voc_split3_5shot_finetune.yaml ├── fewshot_baseline │ ├── base │ │ ├── e2e_coco_base.yaml │ │ ├── e2e_voc_split1_base.yaml │ │ ├── e2e_voc_split2_base.yaml │ │ └── e2e_voc_split3_base.yaml │ └── standard │ │ ├── e2e_coco_10shot_finetune.yaml │ │ ├── e2e_coco_30shot_finetune.yaml │ │ ├── e2e_voc_split1_10shot_finetune.yaml │ │ ├── e2e_voc_split1_1shot_finetune.yaml │ │ ├── e2e_voc_split1_2shot_finetune.yaml │ │ ├── e2e_voc_split1_3shot_finetune.yaml │ │ ├── e2e_voc_split1_5shot_finetune.yaml │ │ ├── e2e_voc_split2_10shot_finetune.yaml │ │ ├── e2e_voc_split2_1shot_finetune.yaml │ │ ├── e2e_voc_split2_2shot_finetune.yaml │ │ ├── e2e_voc_split2_3shot_finetune.yaml │ │ ├── e2e_voc_split2_5shot_finetune.yaml │ │ ├── e2e_voc_split3_10shot_finetune.yaml │ │ ├── e2e_voc_split3_1shot_finetune.yaml │ │ ├── e2e_voc_split3_2shot_finetune.yaml │ │ ├── e2e_voc_split3_3shot_finetune.yaml │ │ └── e2e_voc_split3_5shot_finetune.yaml ├── gn_baselines │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml ├── pascal_voc │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── quick_schedules │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── rpn_R_50_C4_quick.yaml │ └── rpn_R_50_FPN_quick.yaml ├── retinanet │ ├── retinanet_R-101-FPN_1x.yaml │ ├── retinanet_R-101-FPN_P5_1x.yaml │ ├── retinanet_R-50-FPN_1x.yaml │ ├── retinanet_R-50-FPN_1x_quick.yaml │ ├── retinanet_R-50-FPN_P5_1x.yaml │ └── retinanet_X_101_32x8d_FPN_1x.yaml ├── rpn_R_101_FPN_1x.yaml ├── rpn_R_50_C4_1x.yaml ├── rpn_R_50_FPN_1x.yaml ├── rpn_X_101_32x8d_FPN_1x.yaml └── test_time_aug │ └── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── demo ├── README.md ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png ├── predictor.py └── webcam.py ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── abstract.py │ │ ├── cityscapes.py │ │ ├── closeup.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── cityscapes │ │ │ │ ├── __init__.py │ │ │ │ ├── cityscapes_eval.py │ │ │ │ └── eval_instances.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ ├── abs_to_coco.py │ │ │ │ ├── coco_eval.py │ │ │ │ └── coco_eval_wrapper.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── bbox_aug.py │ ├── demo.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ ├── deform_conv_module.py │ │ ├── deform_pool_func.py │ │ └── deform_pool_module.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fbnet.py │ │ ├── fbnet_builder.py │ │ ├── fbnet_modeldef.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ └── retinanet.py │ │ ├── rpn.py │ │ └── utils.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── registry.py │ └── timer.py ├── setup.py ├── tests ├── checkpoint.py ├── env_tests │ └── env.py ├── test_backbones.py ├── test_box_coder.py ├── test_configs.py ├── test_data_samplers.py ├── test_detectors.py ├── test_fbnet.py ├── test_feature_extractors.py ├── test_metric_logger.py ├── test_nms.py ├── test_predictors.py ├── test_rpn_heads.py ├── test_segmentation_mask.py └── utils.py └── tools ├── cityscapes ├── convert_cityscapes_to_coco.py └── instances2dict_with_polygons.py ├── demo.py ├── fewshot_exp ├── MPSR_arch.jpg ├── cal_novel_coco.py ├── cal_novel_voc.py ├── cocoeval.py ├── crops │ ├── create_crops_coco_base.py │ ├── create_crops_coco_standard.py │ ├── create_crops_voc_base.py │ └── create_crops_voc_standard.py ├── datasets │ ├── coco_create_base.py │ ├── coco_create_standard.py │ ├── init_fs_dataset_standard.sh │ ├── voc_create_base.py │ ├── voc_create_standard.py │ └── voc_sample_series.py ├── train_baseline_coco_base.sh ├── train_baseline_coco_standard.sh ├── train_baseline_voc_base.sh ├── train_baseline_voc_standard.sh ├── train_coco_base.sh ├── train_coco_standard.sh ├── train_voc_base.sh ├── train_voc_series.sh ├── train_voc_standard.sh ├── trans_baseline_coco_pretrained.py ├── trans_baseline_voc_pretrained.py ├── trans_coco_pretrained.py └── trans_voc_pretrained.py ├── test_net.py └── train_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | maskrcnn_benchmark.egg-info/ 7 | build/ 8 | dist/ 9 | 10 | # pytorch/python/numpy formats 11 | *.pth 12 | *.pkl 13 | *.npy 14 | 15 | # ipython/jupyter notebooks 16 | *.ipynb 17 | **/.ipynb_checkpoints/ 18 | 19 | # Editor temporaries 20 | *.swn 21 | *.swo 22 | *.swp 23 | *~ 24 | 25 | # Pycharm editor settings 26 | .idea 27 | 28 | # vscode editor settings 29 | .vscode 30 | 31 | # MacOS 32 | .DS_Store 33 | 34 | # project dirs 35 | /datasets 36 | /models 37 | /output 38 | /fs_exp 39 | /inference 40 | 41 | #extra log 42 | /log.txt 43 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.1.0 5 | - torchvision 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - OpenCV 11 | - CUDA >= 9.0 12 | 13 | 14 | ### Step-by-step installation 15 | 16 | ```bash 17 | # first, make sure that your conda is setup properly with the right environment 18 | # for that, check that `which conda`, `which pip` and `which python` points to the 19 | # right path. From a clean conda env, this is what you need to do 20 | 21 | conda create --name maskrcnn_benchmark -y 22 | conda activate maskrcnn_benchmark 23 | 24 | # this installs the right pip and dependencies for the fresh python 25 | conda install ipython pip 26 | 27 | # maskrcnn_benchmark and coco api dependencies 28 | pip install ninja yacs cython matplotlib tqdm opencv-python 29 | 30 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 31 | # we give the instructions for CUDA 9.0 32 | #conda install -c pytorch pytorch-nightly torchvision cudatoolkit 33 | pip install pytorch==1.1.0 34 | pip install torchvision==0.3.0 35 | 36 | export INSTALL_DIR=$PWD 37 | 38 | git clone https://github.com/jiaxi-wu/MPSR.git 39 | 40 | # install pycocotools 41 | # modify "cocoeval.py" to store raw results in "~/coco_result.txt" 42 | cd $INSTALL_DIR 43 | git clone https://github.com/cocodataset/cocoapi.git 44 | cp MPSR/tools/fewshot_exp/cocoeval.py cocoapi/PythonAPI/pycocotools/ 45 | cd cocoapi/PythonAPI 46 | python setup.py build_ext install 47 | 48 | # install apex-96b017a8b40f137abb971c4555d61b2fcbb87648 49 | cd $INSTALL_DIR 50 | git clone https://github.com/NVIDIA/apex.git 51 | cd apex 52 | git checkout 96b017a 53 | python setup.py install --cuda_ext --cpp_ext 54 | 55 | # install PyTorch Detection 56 | cd $INSTALL_DIR 57 | cd MPSR 58 | # the following will install the lib with 59 | # symbolic links, so that you can modify 60 | # the files if you want and won't need to 61 | # re-build it 62 | python setup.py build develop 63 | 64 | 65 | unset INSTALL_DIR 66 | ``` 67 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jiaxi Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x" 4 | BACKBONE: 5 | CONV_BODY: "R-152-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | DATASETS: 25 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 26 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 27 | DATALOADER: 28 | SIZE_DIVISIBILITY: 32 29 | SOLVER: 30 | BASE_LR: 0.01 31 | WEIGHT_DECAY: 0.0001 32 | STEPS: (18000,) 33 | MAX_ITER: 24000 34 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 35 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/dcn/README.md: -------------------------------------------------------------------------------- 1 | ### Reference 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf) 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) 4 | 5 | ### Performance 6 | | case | bbox AP | mask AP | 7 | |----------------------------:|--------:|:-------:| 8 | | R-50-FPN-dcn (implement) | 39.8 | - | 9 | | R-50-FPN-dcn (mmdetection) | 40.0 | - | 10 | | R-50-FPN-mdcn (implement) | 40.0 | - | 11 | | R-50-FPN-mdcn (mmdetection) | 40.3 | - | 12 | | R-50-FPN-dcn (implement) | 40.8 | 36.8 | 13 | | R-50-FPN-dcn (mmdetection) | 41.1 | 37.2 | 14 | | R-50-FPN-dcn (implement) | 40.7 | 36.7 | 15 | | R-50-FPN-dcn (mmdetection) | 41.4 | 37.4 | 16 | 17 | 18 | ### Note 19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details. 20 | 21 | 22 | ### Usage 23 | add these three lines 24 | ``` 25 | MODEL: 26 | RESNETS: 27 | # corresponding to C2,C3,C4,C5 28 | STAGE_WITH_DCN: (False, True, True, True) 29 | WITH_MODULATED_DCN: True 30 | DEFORMABLE_GROUPS: 1 31 | ``` -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | 55 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RPN: 7 | USE_FPN: True 8 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 9 | PRE_NMS_TOP_N_TRAIN: 2000 10 | PRE_NMS_TOP_N_TEST: 1000 11 | POST_NMS_TOP_N_TEST: 1000 12 | FPN_POST_NMS_TOP_N_TEST: 1000 13 | ROI_HEADS: 14 | USE_FPN: True 15 | ROI_BOX_HEAD: 16 | POOLER_RESOLUTION: 7 17 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 18 | POOLER_SAMPLING_RATIO: 2 19 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 20 | PREDICTOR: "FPNPredictor" 21 | RESNETS: 22 | BACKBONE_OUT_CHANNELS: 256 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 100 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 512 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (320, ) 40 | MAX_SIZE_TRAIN: 640 41 | MIN_SIZE_TEST: 320 42 | MAX_SIZE_TEST: 640 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "cham_v1a" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 128 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.045 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (90000, 120000) 34 | MAX_ITER: 135000 35 | IMS_PER_BATCH: 96 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: -1.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 512 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/fewshot/base/e2e_coco_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 61 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_train_base", "coco_2014_valminusminival_base") 26 | CLOSEUP: ("coco_2014_closeup_base",) 27 | TEST: ("coco_2014_minival_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (56000, 70000) 34 | MAX_ITER: 80000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/base/e2e_voc_split1_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 16 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("voc_2007_trainval_split1_base", "voc_2012_trainval_split1_base") 26 | CLOSEUP: ("voc_2007_trainval_split1_base_closeup", "voc_2012_trainval_split1_base_closeup") 27 | TEST: ("voc_2007_test_split1_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (24000, 32000) 34 | MAX_ITER: 36000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/base/e2e_voc_split2_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 16 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("voc_2007_trainval_split2_base", "voc_2012_trainval_split2_base") 26 | CLOSEUP: ("voc_2007_trainval_split2_base_closeup", "voc_2012_trainval_split2_base_closeup") 27 | TEST: ("voc_2007_test_split2_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (24000, 32000) 34 | MAX_ITER: 36000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/base/e2e_voc_split3_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 16 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("voc_2007_trainval_split3_base", "voc_2012_trainval_split3_base") 26 | CLOSEUP: ("voc_2007_trainval_split3_base_closeup", "voc_2012_trainval_split3_base_closeup") 27 | TEST: ("voc_2007_test_split3_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (24000, 32000) 34 | MAX_ITER: 36000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_coco_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "coco_base_pretrained.pth" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 81 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_train_10shot_standard", "coco_2014_val_10shot_standard") 26 | CLOSEUP: ("coco_2014_closeup_standard",) 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (2800, 3500) 34 | MAX_ITER: 4000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_coco_30shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "coco_base_pretrained.pth" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | NUM_CLASSES: 81 19 | POOLER_RESOLUTION: 8 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_train_30shot_standard", "coco_2014_val_30shot_standard") 26 | CLOSEUP: ("coco_2014_closeup_standard",) 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (5600, 7000) 34 | MAX_ITER: 8000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split1_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split1base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split1_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split1base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split1_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split1base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split1_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split1base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split1_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split1base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split2_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split2base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split2_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split2base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split2_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split2base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split2_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split2base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split2_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split2base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split3_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split3base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split3_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split3base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split3_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split3base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split3_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split3base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot/standard/e2e_voc_split3_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_split3base_pretrained.pth" 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 8 20 | NUM_CLASSES: 21 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 27 | CLOSEUP: ("voc_2007_trainval_closeup_standard", "voc_2012_trainval_closeup_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/base/e2e_coco_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 61 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("coco_2014_train_base", "coco_2014_valminusminival_base") 27 | TEST: ("coco_2014_minival_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (56000, 70000) 34 | MAX_ITER: 80000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/base/e2e_voc_split1_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 16 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_split1_base", "voc_2012_trainval_split1_base") 27 | TEST: ("voc_2007_test_split1_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (24000, 32000) 34 | MAX_ITER: 36000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/base/e2e_voc_split2_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 16 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_split2_base", "voc_2012_trainval_split2_base") 27 | TEST: ("voc_2007_test_split2_base",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (24000, 32000) 34 | MAX_ITER: 36000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/base/e2e_voc_split3_base.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 16 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("voc_2007_trainval_split3_base", "voc_2012_trainval_split3_base") 27 | CLOSEUP: ("voc_2007_trainval_split3_base_closeup", "voc_2012_trainval_split3_base_closeup") 28 | TEST: ("voc_2007_test_split3_base",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (24000, 32000) 35 | MAX_ITER: 36000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_coco_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "coco_baseline_base_pretrained.pth" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 81 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("coco_2014_train_10shot_standard", "coco_2014_val_10shot_standard") 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (2800, 3500) 34 | MAX_ITER: 4000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_coco_30shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "coco_baseline_base_pretrained.pth" 4 | CLOSEUP_REFINE: False 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | NUM_CLASSES: 81 20 | POOLER_RESOLUTION: 8 21 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 22 | POOLER_SAMPLING_RATIO: 2 23 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 24 | PREDICTOR: "FPNPredictor" 25 | DATASETS: 26 | TRAIN: ("coco_2014_train_30shot_standard", "coco_2014_val_30shot_standard") 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | SOLVER: 31 | BASE_LR: 0.005 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (5600, 7000) 34 | MAX_ITER: 8000 35 | IMS_PER_BATCH: 4 36 | TEST: 37 | IMS_PER_BATCH: 4 38 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split1_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split1base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split1_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split1base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split1_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split1base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split1_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split1base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split1_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split1base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split2_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split2base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split2_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split2base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split2_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split2base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split2_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split2base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split2_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split2base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split3_10shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split3base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_10shot_standard", "voc_2012_trainval_10shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split3_1shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split3base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_1shot_standard", "voc_2012_trainval_1shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split3_2shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split3base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_2shot_standard", "voc_2012_trainval_2shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split3_3shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split3base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_3shot_standard", "voc_2012_trainval_3shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/fewshot_baseline/standard/e2e_voc_split3_5shot_finetune.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | #WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | WEIGHT: "voc0712_baseline_split3base_pretrained.pth" 5 | CLOSEUP_REFINE: False 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | ROI_BOX_HEAD: 20 | POOLER_RESOLUTION: 8 21 | NUM_CLASSES: 21 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("voc_2007_trainval_5shot_standard", "voc_2012_trainval_5shot_standard") 28 | TEST: ("voc_2007_test",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.005 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (1300, 1700) 35 | MAX_ITER: 2000 36 | IMS_PER_BATCH: 4 37 | TEST: 38 | IMS_PER_BATCH: 4 39 | -------------------------------------------------------------------------------- /configs/gn_baselines/README.md: -------------------------------------------------------------------------------- 1 | ### Group Normalization 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494) 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md) 5 | 6 | 7 | ### Performance 8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP | 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:| 10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 | 11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 | 12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 | 13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 | 14 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (60000, 80000) 46 | MAX_ITER: 90000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (60000, 80000) 48 | MAX_ITER: 90000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 90000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 63 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (210000, 250000) 47 | MAX_ITER: 270000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | DATASETS: 40 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 41 | TEST: ("coco_2014_minival",) 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 8 gpus 46 | BASE_LR: 0.02 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (210000, 250000) 49 | MAX_ITER: 270000 50 | IMS_PER_BATCH: 16 51 | TEST: 52 | IMS_PER_BATCH: 8 53 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | ROI_MASK_HEAD: 38 | USE_GN: True # use GN for mask head 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | CONV_LAYERS: (256, 256, 256, 256) 41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 42 | PREDICTOR: "MaskRCNNC4Predictor" 43 | POOLER_RESOLUTION: 14 44 | POOLER_SAMPLING_RATIO: 2 45 | RESOLUTION: 28 46 | SHARE_BOX_FEATURE_EXTRACTOR: False 47 | MASK_ON: True 48 | DATASETS: 49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 50 | TEST: ("coco_2014_minival",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | # Assume 8 gpus 55 | BASE_LR: 0.02 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (210000, 250000) 58 | MAX_ITER: 270000 59 | IMS_PER_BATCH: 16 60 | TEST: 61 | IMS_PER_BATCH: 8 62 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | ROI_MASK_HEAD: 40 | USE_GN: True # use GN for mask head 41 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 42 | CONV_LAYERS: (256, 256, 256, 256) 43 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 44 | PREDICTOR: "MaskRCNNC4Predictor" 45 | POOLER_RESOLUTION: 14 46 | POOLER_SAMPLING_RATIO: 2 47 | RESOLUTION: 28 48 | SHARE_BOX_FEATURE_EXTRACTOR: False 49 | MASK_ON: True 50 | DATASETS: 51 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 52 | TEST: ("coco_2014_minival",) 53 | DATALOADER: 54 | SIZE_DIVISIBILITY: 32 55 | SOLVER: 56 | # Assume 8 gpus 57 | BASE_LR: 0.02 58 | WEIGHT_DECAY: 0.0001 59 | STEPS: (210000, 250000) 60 | MAX_ITER: 270000 61 | IMS_PER_BATCH: 16 62 | TEST: 63 | IMS_PER_BATCH: 8 64 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.001 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (50000, ) 17 | MAX_ITER: 70000 18 | IMS_PER_BATCH: 1 19 | TEST: 20 | IMS_PER_BATCH: 1 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.004 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (12500, ) 17 | MAX_ITER: 17500 18 | IMS_PER_BATCH: 4 19 | TEST: 20 | IMS_PER_BATCH: 4 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 21 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("voc_2012_train_cocostyle",) 35 | TEST: ("voc_2012_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: (600,) 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_minival",) 26 | TEST: ("coco_2014_minival",) 27 | INPUT: 28 | MIN_SIZE_TRAIN: (600,) 29 | MAX_SIZE_TRAIN: 1000 30 | MIN_SIZE_TEST: 800 31 | MAX_SIZE_TEST: 1000 32 | DATALOADER: 33 | SIZE_DIVISIBILITY: 32 34 | SOLVER: 35 | BASE_LR: 0.005 36 | WEIGHT_DECAY: 0.0001 37 | STEPS: (1500,) 38 | MAX_ITER: 2000 39 | IMS_PER_BATCH: 4 40 | TEST: 41 | IMS_PER_BATCH: 2 42 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: (600,) 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 2 25 | ROI_KEYPOINT_HEAD: 26 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 27 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 28 | PREDICTOR: "KeypointRCNNPredictor" 29 | POOLER_RESOLUTION: 14 30 | POOLER_SAMPLING_RATIO: 2 31 | RESOLUTION: 56 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | KEYPOINT_ON: True 34 | DATASETS: 35 | TRAIN: ("keypoints_coco_2014_minival",) 36 | TEST: ("keypoints_coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 39 | MAX_SIZE_TRAIN: 1000 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1000 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (1500,) 48 | MAX_ITER: 2000 49 | IMS_PER_BATCH: 4 50 | TEST: 51 | IMS_PER_BATCH: 2 52 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: (600,) 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (1500,) 47 | MAX_ITER: 2000 48 | IMS_PER_BATCH: 4 49 | TEST: 50 | IMS_PER_BATCH: 2 51 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: (600,) 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_minival",) 17 | TEST: ("coco_2014_minival",) 18 | INPUT: 19 | MIN_SIZE_TRAIN: (600,) 20 | MAX_SIZE_TRAIN: 1000 21 | MIN_SIZE_TEST: 800 22 | MAX_SIZE_TEST: 1000 23 | DATALOADER: 24 | SIZE_DIVISIBILITY: 32 25 | SOLVER: 26 | BASE_LR: 0.005 27 | WEIGHT_DECAY: 0.0001 28 | STEPS: (1500,) 29 | MAX_ITER: 2000 30 | IMS_PER_BATCH: 4 31 | TEST: 32 | IMS_PER_BATCH: 2 33 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800, ) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800, ) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800,) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (3500,) 47 | MAX_ITER: 4000 48 | IMS_PER_BATCH: 4 49 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800,) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | STRIDE_IN_1X1: False 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | RPN: 14 | USE_FPN: True 15 | FG_IOU_THRESHOLD: 0.5 16 | BG_IOU_THRESHOLD: 0.4 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | ROI_HEADS: 23 | USE_FPN: True 24 | BATCH_SIZE_PER_IMAGE: 256 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | RETINANET: 32 | SCALES_PER_OCTAVE: 3 33 | STRADDLE_THRESH: -1 34 | FG_IOU_THRESHOLD: 0.5 35 | BG_IOU_THRESHOLD: 0.4 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800, ) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (240000, 320000) 51 | MAX_ITER: 360000 52 | IMS_PER_BATCH: 4 53 | -------------------------------------------------------------------------------- /configs/rpn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train",) 17 | TEST: ("coco_2014_val",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 10 | TEST: ("coco_2014_minival",) 11 | SOLVER: 12 | BASE_LR: 0.02 13 | WEIGHT_DECAY: 0.0001 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | -------------------------------------------------------------------------------- /configs/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | STRIDE_IN_1X1: False 10 | NUM_GROUPS: 32 11 | WIDTH_PER_GROUP: 8 12 | RPN: 13 | USE_FPN: True 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 2000 17 | FPN_POST_NMS_TOP_N_TEST: 2000 18 | DATASETS: 19 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 20 | TEST: ("coco_2014_minival",) 21 | DATALOADER: 22 | SIZE_DIVISIBILITY: 32 23 | SOLVER: 24 | BASE_LR: 0.02 25 | WEIGHT_DECAY: 0.0001 26 | STEPS: (60000, 80000) 27 | MAX_ITER: 90000 28 | -------------------------------------------------------------------------------- /configs/test_time_aug/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | TEST: 43 | BBOX_AUG: 44 | ENABLED: True 45 | H_FLIP: True 46 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 47 | MAX_SIZE: 2000 48 | SCALE_H_FLIP: True 49 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference. 4 | 5 | 6 | ### With your preferred environment 7 | 8 | You can start it by running it from this folder, using one of the following commands: 9 | ```bash 10 | # by default, it runs on the GPU 11 | # for best results, use min-image-size 800 12 | python webcam.py --min-image-size 800 13 | # can also run it on the CPU 14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 15 | # or change the model that you want to use 16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu 17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 19 | ``` 20 | 21 | ### With Docker 22 | 23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions) 24 | 25 | Adjust permissions of the X server host (be careful with this step, refer to 26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives) 27 | 28 | ```bash 29 | xhost + 30 | ``` 31 | 32 | Then run a container with the demo: 33 | 34 | ``` 35 | docker run --rm -it \ 36 | -e DISPLAY=${DISPLAY} \ 37 | --privileged \ 38 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 39 | --device=/dev/video0:/dev/video0 \ 40 | --ipc=host maskrcnn-benchmark \ 41 | python demo/webcam.py --min-image-size 300 \ 42 | --config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml 43 | ``` 44 | 45 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 46 | the volume mapping may vary depending on your platform* 47 | -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | #include "deform_conv.h" 7 | #include "deform_pool.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("nms", &nms, "non-maximum suppression"); 11 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 12 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 13 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 14 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 15 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 16 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 17 | // dcn-v2 18 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 19 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 20 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 21 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 22 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 23 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 24 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 25 | } -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | import torch 4 | 5 | 6 | class BatchCollator(object): 7 | """ 8 | From a list of samples from the dataset, 9 | returns the batched images and targets. 10 | This should be passed to the DataLoader 11 | """ 12 | 13 | def __init__(self, size_divisible=0): 14 | self.size_divisible = size_divisible 15 | 16 | def __call__(self, batch): 17 | transposed_batch = list(zip(*batch)) 18 | if len(transposed_batch) == 3: 19 | images = to_image_list(transposed_batch[0], self.size_divisible) 20 | targets = transposed_batch[1] 21 | img_ids = transposed_batch[2] 22 | return images, targets, img_ids 23 | else: 24 | images = [torch.stack(image_per_level) for image_per_level in list(zip(*transposed_batch[0]))] 25 | targets = torch.cat(transposed_batch[1]) 26 | return images, targets 27 | 28 | 29 | class BBoxAugCollator(object): 30 | """ 31 | From a list of samples from the dataset, 32 | returns the images and targets. 33 | Images should be converted to batched images in `im_detect_bbox_aug` 34 | """ 35 | 36 | def __call__(self, batch): 37 | return list(zip(*batch)) 38 | 39 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .coco import COCODataset 4 | from .voc import PascalVOCDataset 5 | from .concat_dataset import ConcatDataset 6 | from .abstract import AbstractDataset 7 | from .cityscapes import CityScapesDataset 8 | from .closeup import CloseupDataset 9 | 10 | __all__ = [ 11 | "COCODataset", 12 | "ConcatDataset", 13 | "PascalVOCDataset", 14 | "AbstractDataset", 15 | "CityScapesDataset", 16 | "CloseupDataset" 17 | ] 18 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | from .cityscapes import abs_cityscapes_evaluation 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | elif isinstance(dataset, datasets.AbstractDataset): 26 | return abs_cityscapes_evaluation(**args) 27 | else: 28 | dataset_name = dataset.__class__.__name__ 29 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 30 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/cityscapes/__init__.py: -------------------------------------------------------------------------------- 1 | from .cityscapes_eval import do_cityscapes_evaluation 2 | 3 | 4 | def abs_cityscapes_evaluation( 5 | dataset, 6 | predictions, 7 | box_only, 8 | output_folder, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_cityscapes_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation as do_orig_coco_evaluation 2 | from .coco_eval_wrapper import do_coco_evaluation as do_wrapped_coco_evaluation 3 | from maskrcnn_benchmark.data.datasets import AbstractDataset, COCODataset 4 | 5 | 6 | def coco_evaluation( 7 | dataset, 8 | predictions, 9 | output_folder, 10 | box_only, 11 | iou_types, 12 | expected_results, 13 | expected_results_sigma_tol, 14 | ): 15 | if isinstance(dataset, COCODataset): 16 | return do_orig_coco_evaluation( 17 | dataset=dataset, 18 | predictions=predictions, 19 | box_only=box_only, 20 | output_folder=output_folder, 21 | iou_types=iou_types, 22 | expected_results=expected_results, 23 | expected_results_sigma_tol=expected_results_sigma_tol, 24 | ) 25 | elif isinstance(dataset, AbstractDataset): 26 | return do_wrapped_coco_evaluation( 27 | dataset=dataset, 28 | predictions=predictions, 29 | box_only=box_only, 30 | output_folder=output_folder, 31 | iou_types=iou_types, 32 | expected_results=expected_results, 33 | expected_results_sigma_tol=expected_results_sigma_tol, 34 | ) 35 | else: 36 | raise NotImplementedError( 37 | ( 38 | "Ground truth dataset is not a COCODataset, " 39 | "nor it is derived from AbstractDataset: type(dataset)=" 40 | "%s" % type(dataset) 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/coco_eval_wrapper.py: -------------------------------------------------------------------------------- 1 | # COCO style evaluation for custom datasets derived from AbstractDataset 2 | # by botcs@github 3 | 4 | import logging 5 | import os 6 | import json 7 | 8 | from maskrcnn_benchmark.data.datasets.coco import COCODataset 9 | from .coco_eval import do_coco_evaluation as orig_evaluation 10 | from .abs_to_coco import convert_abstract_to_coco 11 | 12 | 13 | def do_coco_evaluation( 14 | dataset, 15 | predictions, 16 | box_only, 17 | output_folder, 18 | iou_types, 19 | expected_results, 20 | expected_results_sigma_tol, 21 | ): 22 | 23 | logger = logging.getLogger("maskrcnn_benchmark.inference") 24 | logger.info("Converting annotations to COCO format...") 25 | coco_annotation_dict = convert_abstract_to_coco(dataset) 26 | 27 | dataset_name = dataset.__class__.__name__ 28 | coco_annotation_path = os.path.join(output_folder, dataset_name + ".json") 29 | logger.info("Saving annotations to %s" % coco_annotation_path) 30 | with open(coco_annotation_path, "w") as f: 31 | json.dump(coco_annotation_dict, f, indent=2) 32 | 33 | logger.info("Loading annotations as COCODataset") 34 | coco_dataset = COCODataset( 35 | ann_file=coco_annotation_path, 36 | root="", 37 | remove_images_without_annotations=False, 38 | transforms=None, # transformations should be already saved to the json 39 | ) 40 | 41 | return orig_evaluation( 42 | dataset=coco_dataset, 43 | predictions=predictions, 44 | box_only=box_only, 45 | output_folder=output_folder, 46 | iou_types=iou_types, 47 | expected_results=expected_results, 48 | expected_results_sigma_tol=expected_results, 49 | ) 50 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | from .transforms import RandomCrop 8 | 9 | from .build import build_transforms, build_closeup_transforms 10 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import DFConv2d 7 | from .misc import ConvTranspose2d 8 | from .misc import BatchNorm2d 9 | from .misc import interpolate 10 | from .nms import nms 11 | from .roi_align import ROIAlign 12 | from .roi_align import roi_align 13 | from .roi_pool import ROIPool 14 | from .roi_pool import roi_pool 15 | from .smooth_l1_loss import smooth_l1_loss 16 | from .sigmoid_focal_loss import SigmoidFocalLoss 17 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 18 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 19 | from .dcn.deform_pool_func import deform_roi_pooling 20 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 21 | 22 | 23 | __all__ = [ 24 | "nms", 25 | "roi_align", 26 | "ROIAlign", 27 | "roi_pool", 28 | "ROIPool", 29 | "smooth_l1_loss", 30 | "Conv2d", 31 | "DFConv2d", 32 | "ConvTranspose2d", 33 | "interpolate", 34 | "BatchNorm2d", 35 | "FrozenBatchNorm2d", 36 | "SigmoidFocalLoss", 37 | 'deform_conv', 38 | 'modulated_deform_conv', 39 | 'DeformConv', 40 | 'ModulatedDeformConv', 41 | 'ModulatedDeformConvPack', 42 | 'deform_roi_pooling', 43 | 'DeformRoIPooling', 44 | 'DeformRoIPoolingPack', 45 | 'ModulatedDeformRoIPoolingPack', 46 | ] 47 | 48 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | # Cast all fixed parameters to half() if necessary 21 | if x.dtype == torch.float16: 22 | self.weight = self.weight.half() 23 | self.bias = self.bias.half() 24 | self.running_mean = self.running_mean.half() 25 | self.running_var = self.running_var.half() 26 | 27 | scale = self.weight * self.running_var.rsqrt() 28 | bias = self.bias - self.running_mean * scale 29 | scale = scale.reshape(1, -1, 1, 1) 30 | bias = bias.reshape(1, -1, 1, 1) 31 | return x * scale + bias 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | from apex import amp 6 | 7 | # Only valid with fp32 inputs - give AMP the hint 8 | nms = amp.float_function(_C.nms) 9 | 10 | # nms.__doc__ = """ 11 | # This function performs Non-maximum suppresion""" 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | from . import fbnet 4 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | RPN_HEADS = Registry() 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 8 | ROI_BOX_PREDICTOR = Registry() 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 10 | ROI_KEYPOINT_PREDICTOR = Registry() 11 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 12 | ROI_MASK_PREDICTOR = Registry() 13 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.modeling import registry 5 | from maskrcnn_benchmark.modeling.poolers import Pooler 6 | 7 | from maskrcnn_benchmark.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from maskrcnn_benchmark import layers 4 | from maskrcnn_benchmark.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /tests/env_tests/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import os 4 | 5 | 6 | def get_config_root_path(): 7 | ''' Path to configs for unit tests ''' 8 | # cur_file_dir is root/tests/env_tests 9 | cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) 10 | ret = os.path.dirname(os.path.dirname(cur_file_dir)) 11 | ret = os.path.join(ret, "configs") 12 | return ret 13 | -------------------------------------------------------------------------------- /tests/test_backbones.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register backbones 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA 8 | from maskrcnn_benchmark.modeling import registry 9 | from maskrcnn_benchmark.config import cfg as g_cfg 10 | from utils import load_config 11 | 12 | 13 | # overwrite configs if specified, otherwise default config is used 14 | BACKBONE_CFGS = { 15 | "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml", 16 | "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 17 | "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 18 | "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml", 19 | "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml", 20 | } 21 | 22 | 23 | class TestBackbones(unittest.TestCase): 24 | def test_build_backbones(self): 25 | ''' Make sure backbones run ''' 26 | 27 | self.assertGreater(len(registry.BACKBONES), 0) 28 | 29 | for name, backbone_builder in registry.BACKBONES.items(): 30 | print('Testing {}...'.format(name)) 31 | if name in BACKBONE_CFGS: 32 | cfg = load_config(BACKBONE_CFGS[name]) 33 | else: 34 | # Use default config if config file is not specified 35 | cfg = copy.deepcopy(g_cfg) 36 | backbone = backbone_builder(cfg) 37 | 38 | # make sures the backbone has `out_channels` 39 | self.assertIsNotNone( 40 | getattr(backbone, 'out_channels', None), 41 | 'Need to provide out_channels for backbone {}'.format(name) 42 | ) 43 | 44 | N, C_in, H, W = 2, 3, 224, 256 45 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 46 | out = backbone(input) 47 | for cur_out in out: 48 | self.assertEqual( 49 | cur_out.shape[:2], 50 | torch.Size([N, backbone.out_channels]) 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/test_configs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import glob 5 | import os 6 | import utils 7 | 8 | 9 | class TestConfigs(unittest.TestCase): 10 | def test_configs_load(self): 11 | ''' Make sure configs are loadable ''' 12 | 13 | cfg_root_path = utils.get_config_root_path() 14 | files = glob.glob( 15 | os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True) 16 | self.assertGreater(len(files), 0) 17 | 18 | for fn in files: 19 | print('Loading {}...'.format(fn)) 20 | utils.load_config_from_file(fn) 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /tests/test_metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 5 | 6 | 7 | class TestMetricLogger(unittest.TestCase): 8 | def test_update(self): 9 | meter = MetricLogger() 10 | for i in range(10): 11 | meter.update(metric=float(i)) 12 | 13 | m = meter.meters["metric"] 14 | self.assertEqual(m.count, 10) 15 | self.assertEqual(m.total, 45) 16 | self.assertEqual(m.median, 4) 17 | self.assertEqual(m.avg, 4.5) 18 | 19 | def test_no_attr(self): 20 | meter = MetricLogger() 21 | _ = meter.meters 22 | _ = meter.delimiter 23 | def broken(): 24 | _ = meter.not_existent 25 | self.assertRaises(AttributeError, broken) 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # Set up custom environment before nearly anything else is imported 4 | # NOTE: this should be the first import (no not reorder) 5 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 6 | import env_tests.env as env_tests 7 | 8 | import os 9 | import copy 10 | 11 | from maskrcnn_benchmark.config import cfg as g_cfg 12 | 13 | 14 | def get_config_root_path(): 15 | return env_tests.get_config_root_path() 16 | 17 | 18 | def load_config(rel_path): 19 | ''' Load config from file path specified as path relative to config_root ''' 20 | cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path) 21 | return load_config_from_file(cfg_path) 22 | 23 | 24 | def load_config_from_file(file_path): 25 | ''' Load config from file path specified as absolute path ''' 26 | ret = copy.deepcopy(g_cfg) 27 | ret.merge_from_file(file_path) 28 | return ret 29 | -------------------------------------------------------------------------------- /tools/fewshot_exp/MPSR_arch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiaxi-wu/MPSR/3e4ecdbeb02faf1857ada74858b38187213d676e/tools/fewshot_exp/MPSR_arch.jpg -------------------------------------------------------------------------------- /tools/fewshot_exp/cal_novel_coco.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data.datasets.closeup import CloseupDataset 2 | import sys 3 | all_cls = CloseupDataset.CLASSES_COCO[1:] 4 | #novel_cls = [c for c in CloseupDataset.CLASSES_COCO if c not in CloseupDataset.CLASSES_COCO_BASE] 5 | novel_cls = CloseupDataset.CLASSES_COCO_NOVEL 6 | novel_idx = [all_cls.index(i) for i in novel_cls] 7 | for shot in [10, 30]: 8 | novel_mmap = [0] * 20 9 | novel_map = [0] * 20 10 | try: 11 | with open(sys.argv[1] + '/result_%dshot.txt'%shot) as f: 12 | results = f.readlines() 13 | #mmap 14 | mmaps = results[2: 82] 15 | mmaps = [float(i.strip()) for i in mmaps] 16 | for i, j in enumerate(novel_idx): 17 | novel_mmap[i] += mmaps[j] 18 | #map 19 | maps = results[83: 163] 20 | maps = [float(i.strip()) for i in maps] 21 | for i, j in enumerate(novel_idx): 22 | novel_map[i] += maps[j] 23 | print('result of %d shot:'%shot) 24 | print('novel mmap:%.4f'%(sum(novel_mmap) / 20)) 25 | print(dict(zip(novel_cls, novel_mmap))) 26 | print('novel map:%.4f'%(sum(novel_map) / 20)) 27 | print(dict(zip(novel_cls, novel_map))) 28 | print('') 29 | except Exception as e: 30 | print('result file: %d shot not found'%shot) 31 | continue 32 | -------------------------------------------------------------------------------- /tools/fewshot_exp/cal_novel_voc.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data.datasets.voc import PascalVOCDataset 2 | import sys 3 | # arg 1: pathdir; 4 | for split in range(1, 4): 5 | for shot in [1, 2, 3, 5, 10]: 6 | all_cls = PascalVOCDataset.CLASSES 7 | novel_cls = [PascalVOCDataset.CLASSES_SPLIT1_NOVEL, 8 | PascalVOCDataset.CLASSES_SPLIT2_NOVEL, 9 | PascalVOCDataset.CLASSES_SPLIT3_NOVEL,][split - 1] 10 | novel_index = [all_cls.index(c) for c in novel_cls] 11 | AP = [0] * 5 12 | try: 13 | with open(sys.argv[1] + '/result_split%d_%dshot.txt'%(split, shot), 'r') as f: 14 | content = f.readlines() 15 | for k, j in enumerate(novel_index): 16 | AP[k] += float(content[j][18 : 24]) 17 | print("VOC split%d %2dshot:novel map:%.4f"%(split, shot, sum(AP) / 5)) 18 | except Exception as e: 19 | continue 20 | -------------------------------------------------------------------------------- /tools/fewshot_exp/crops/create_crops_coco_base.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from maskrcnn_benchmark.data.datasets.coco import COCODataset 3 | from maskrcnn_benchmark.data.datasets.closeup import CloseupDataset 4 | import os 5 | #crop the object from original image and save them in original shape 6 | #save object under categorized folders 7 | 8 | 9 | #here we do not crop the original size, but crop a (8 / 7) larger closeup 10 | def get_closeup(image, target): 11 | closeup = [] 12 | closeup_target = target.get_field('labels').tolist() 13 | for t in range(len(target)): 14 | x1, y1, x2, y2 = target.bbox[t].tolist() 15 | if min(x2 - x1, y2 - y1) < 8: 16 | continue 17 | cutsize = max(x2 - x1, y2 - y1) * 8 / 7 / 2 18 | midx = (x1 + x2) / 2 19 | midy = (y1 + y2) / 2 20 | crop_img = image.crop((int(midx - cutsize), int(midy - cutsize), int(midx + cutsize), int(midy + cutsize))) 21 | closeup.append(crop_img) 22 | return closeup, closeup_target 23 | 24 | 25 | imgdirs = ['datasets/coco/train2014', 'datasets/coco/val2014'] 26 | annofiles = ["datasets/coco/annotations/instances_train2014_base.json", "datasets/coco/annotations/instances_valminusminival2014_base.json"] 27 | if not os.path.exists('datasets/coco/Crops'): 28 | os.mkdir('datasets/coco/Crops') 29 | else: 30 | shutil.rmtree('datasets/coco/Crops') 31 | os.mkdir('datasets/coco/Crops') 32 | for cls in CloseupDataset.CLASSES_COCO_BASE: 33 | os.mkdir('datasets/coco/Crops/' + cls) 34 | cls_count = {cls: 0 for cls in CloseupDataset.CLASSES_COCO_BASE} 35 | for s in range(2): 36 | dataset = COCODataset(annofiles[s], imgdirs[s], True) 37 | for index in range(len(dataset)): 38 | img, annos, _ = dataset.__getitem__(index) 39 | crops, crop_labels = get_closeup(img, annos) 40 | for crop, label in list(zip(crops, crop_labels)): 41 | label = CloseupDataset.CLASSES_COCO_BASE[label] 42 | cls_count[label] += 1 43 | crop.save('datasets/coco/Crops/%s/%d.jpg'%(label, cls_count[label])) 44 | print(cls_count) 45 | print('crop amount:%d'%sum(list(cls_count.values()))) 46 | -------------------------------------------------------------------------------- /tools/fewshot_exp/crops/create_crops_voc_base.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from maskrcnn_benchmark.data.datasets.voc import PascalVOCDataset 3 | import os, shutil 4 | import sys 5 | #crop the object from original image and save them in original shape 6 | #save object under categorized folders 7 | split = int(sys.argv[1]) 8 | 9 | 10 | #here we do not crop the original size, but crop a (8 / 7) larger closeup 11 | def get_closeup(image, target): 12 | closeup = [] 13 | closeup_target = target.get_field('labels').tolist() 14 | for t in range(len(target)): 15 | x1, y1, x2, y2 = target.bbox[t].tolist() 16 | cutsize = max(x2 - x1, y2 - y1) * 8 / 7 / 2 17 | midx = (x1 + x2) / 2 18 | midy = (y1 + y2) / 2 19 | crop_img = image.crop((int(midx - cutsize), int(midy - cutsize), int(midx + cutsize), int(midy + cutsize))) 20 | closeup.append(crop_img) 21 | return closeup, closeup_target 22 | 23 | 24 | datadirs = ['datasets/voc/VOC2007', 'datasets/voc/VOC2012'] 25 | splits = ['trainval_split%d_base'%split, 'trainval_split%d_base'%split] 26 | for s in range(2): 27 | dataset = PascalVOCDataset(datadirs[s], splits[s]) 28 | if not os.path.exists(datadirs[s] + '/Crops'): 29 | os.mkdir(datadirs[s] + '/Crops') 30 | else: 31 | shutil.rmtree(datadirs[s] + '/Crops') 32 | os.mkdir(datadirs[s] + '/Crops') 33 | for cls in PascalVOCDataset.CLASSES[1:]: 34 | os.mkdir(datadirs[s] + '/Crops/' + cls) 35 | cls_count = {cls: 0 for cls in PascalVOCDataset.CLASSES} 36 | for index in range(len(dataset)): 37 | img_id = dataset.ids[index] 38 | img = Image.open(datadirs[s] + '/JPEGImages/%s.jpg'%img_id).convert("RGB") 39 | annos = dataset.get_groundtruth(index) 40 | crops, crop_labels = get_closeup(img, annos) 41 | for crop, label in list(zip(crops, crop_labels)): 42 | #label = PascalVOCDataset.CLASSES[label] 43 | label = dataset.categories[label] 44 | cls_count[label] += 1 45 | crop.save(datadirs[s] + '/Crops/%s/%d.jpg'%(label, cls_count[label])) 46 | print(cls_count) 47 | print('crop amount:%d'%sum(list(cls_count.values()))) 48 | -------------------------------------------------------------------------------- /tools/fewshot_exp/datasets/init_fs_dataset_standard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #data split from code imp for "Few-shot Object Detection via Feature Reweighting, ICCV 2019" 3 | git clone https://github.com/bingykang/Fewshot_Detection.git ../Fewshot_Detection 4 | #init base/novel sets for fewshot exps 5 | python tools/fewshot_exp/datasets/voc_create_base.py 6 | python tools/fewshot_exp/datasets/voc_create_standard.py 7 | python tools/fewshot_exp/datasets/coco_create_base.py 8 | python tools/fewshot_exp/datasets/coco_create_standard.py 9 | mkdir fs_exp 10 | -------------------------------------------------------------------------------- /tools/fewshot_exp/datasets/voc_create_standard.py: -------------------------------------------------------------------------------- 1 | import os 2 | from maskrcnn_benchmark.data.datasets.voc import PascalVOCDataset 3 | cls = PascalVOCDataset.CLASSES[1:] 4 | yolodir = '../Fewshot_Detection' 5 | for shot in [10, 5, 3, 2, 1]: 6 | ids = [] 7 | for c in cls: 8 | with open(yolodir + '/data/vocsplit/box_%dshot_%s_train.txt'%(shot, c)) as f: 9 | content = f.readlines() 10 | content = [i.strip().split('/')[-1][:-4] for i in content] 11 | ids += content 12 | ids = list(set(ids)) 13 | with open('datasets/voc/VOC2007/ImageSets/Main/trainval_%dshot_novel_standard.txt'%shot, 'w+') as f: 14 | for i in ids: 15 | if '_' not in i: 16 | f.write(i + '\n') 17 | with open('datasets/voc/VOC2012/ImageSets/Main/trainval_%dshot_novel_standard.txt'%shot, 'w+') as f: 18 | for i in ids: 19 | if '_' in i: 20 | f.write(i + '\n') 21 | 22 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_baseline_coco_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | configfile=configs/fewshot_baseline/base/e2e_coco_base.yaml 5 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 6 | mv model_final.pth model_baseline_coco_base.pth 7 | mv ~/coco_result.txt fs_exp/result_coco_base.txt 8 | rm last_checkpoint 9 | python tools/fewshot_exp/trans_baseline_coco_pretrained.py 10 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_baseline_coco_standard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SHOT=(10 30) 5 | mkdir fs_exp/coco_baseline_standard_results 6 | for shot in ${SHOT[*]} 7 | do 8 | configfile=configs/fewshot_baseline/standard/e2e_coco_${shot}shot_finetune.yaml 9 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 10 | rm model_final.pth 11 | rm last_checkpoint 12 | mv ~/coco_result.txt fs_exp/coco_baseline_standard_results/result_${shot}shot.txt 13 | done 14 | python tools/fewshot_exp/cal_novel_coco.py fs_exp/coco_baseline_standard_results 15 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_baseline_voc_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SPLIT=(1 2 3) 5 | for split in ${SPLIT[*]} 6 | do 7 | configfile=configs/fewshot_baseline/base/e2e_voc_split${split}_base.yaml 8 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 9 | mv model_final.pth model_baseline_voc_split${split}_base.pth 10 | mv inference/voc_2007_test_split${split}_base/result.txt fs_exp/result_split${split}_base.txt 11 | rm last_checkpoint 12 | python tools/fewshot_exp/trans_baseline_voc_pretrained.py ${split} 13 | done 14 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_baseline_voc_standard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SPLIT=(1 2 3) 5 | SHOT=(10 5 3 2 1) 6 | mkdir fs_exp/voc_baseline_standard_results 7 | for shot in ${SHOT[*]} 8 | do 9 | for split in ${SPLIT[*]} 10 | do 11 | configfile=configs/fewshot_baseline/standard/e2e_voc_split${split}_${shot}shot_finetune.yaml 12 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 13 | rm model_final.pth 14 | rm last_checkpoint 15 | mv inference/voc_2007_test/result.txt fs_exp/voc_baseline_standard_results/result_split${split}_${shot}shot.txt 16 | done 17 | done 18 | python tools/fewshot_exp/cal_novel_voc.py fs_exp/voc_baseline_standard_results 19 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_coco_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | configfile=configs/fewshot/base/e2e_coco_base.yaml 5 | python tools/fewshot_exp/crops/create_crops_coco_base.py 6 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 7 | mv model_final.pth model_coco_base.pth 8 | mv ~/coco_result.txt fs_exp/result_coco_base.txt 9 | rm last_checkpoint 10 | python tools/fewshot_exp/trans_coco_pretrained.py 11 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_coco_standard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SHOT=(10 30) 5 | mkdir fs_exp/coco_standard_results 6 | for shot in ${SHOT[*]} 7 | do 8 | configfile=configs/fewshot/standard/e2e_coco_${shot}shot_finetune.yaml 9 | python tools/fewshot_exp/crops/create_crops_coco_standard.py ${shot} 10 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 11 | rm model_final.pth 12 | rm last_checkpoint 13 | mv ~/coco_result.txt fs_exp/coco_standard_results/result_${shot}shot.txt 14 | done 15 | python tools/fewshot_exp/cal_novel_coco.py fs_exp/coco_standard_results 16 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_voc_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SPLIT=(1 2 3) 5 | for split in ${SPLIT[*]} 6 | do 7 | configfile=configs/fewshot/base/e2e_voc_split${split}_base.yaml 8 | python tools/fewshot_exp/crops/create_crops_voc_base.py ${split} 9 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 10 | mv model_final.pth model_voc_split${split}_base.pth 11 | mv inference/voc_2007_test_split${split}_base/result.txt fs_exp/result_split${split}_base.txt 12 | rm last_checkpoint 13 | python tools/fewshot_exp/trans_voc_pretrained.py ${split} 14 | done 15 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_voc_series.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Warning: This scripts was used for hyperparamter tuning. 3 | # Only few-shot classes are limited to N-shot (not base) for more general sampling, may leading to a drop in perfromance. 4 | # Hope this will help your for further research. 5 | export CUDA_VISIBLE_DEVICES=0,1 6 | export NGPUS=2 7 | SEED=(0 1 2 3 4 5) 8 | SPLIT=(1 2 3) 9 | SHOT=(10 5 3 2 1) 10 | for seed in ${SEED[*]} 11 | do 12 | mkdir fs_exp/voc_series_results_seed${seed} 13 | for shot in ${SHOT[*]} 14 | do 15 | for split in ${SPLIT[*]} 16 | do 17 | configfile=configs/fewshot/standard/e2e_voc_split${split}_${shot}shot_finetune.yaml 18 | python tools/fewshot_exp/datasets/voc_sample_series.py ${shot} ${split} ${seed} 19 | python tools/fewshot_exp/crops/create_crops_voc_standard.py ${shot} 20 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 21 | rm model_final.pth 22 | rm last_checkpoint 23 | mv inference/voc_2007_test/result.txt fs_exp/voc_series_results_seed${seed}/result_split${split}_${shot}shot.txt 24 | done 25 | done 26 | done 27 | #python tools/fewshot_exp/cal_novel_voc.py 28 | -------------------------------------------------------------------------------- /tools/fewshot_exp/train_voc_standard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0,1 3 | export NGPUS=2 4 | SPLIT=(1 2 3) 5 | SHOT=(10 5 3 2 1) 6 | mkdir fs_exp/voc_standard_results 7 | for shot in ${SHOT[*]} 8 | do 9 | for split in ${SPLIT[*]} 10 | do 11 | configfile=configs/fewshot/standard/e2e_voc_split${split}_${shot}shot_finetune.yaml 12 | python tools/fewshot_exp/crops/create_crops_voc_standard.py ${shot} 13 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file ${configfile} 14 | rm model_final.pth 15 | rm last_checkpoint 16 | mv inference/voc_2007_test/result.txt fs_exp/voc_standard_results/result_split${split}_${shot}shot.txt 17 | done 18 | done 19 | python tools/fewshot_exp/cal_novel_voc.py fs_exp/voc_standard_results 20 | -------------------------------------------------------------------------------- /tools/fewshot_exp/trans_baseline_coco_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch 2 | checkpoint = torch.load('model_baseline_coco_base.pth', map_location=torch.device("cpu")) 3 | model = checkpoint['model'] 4 | aimclass = 81 5 | change = [('module.roi_heads.box.predictor.cls_score.weight', (aimclass, 1024)), 6 | ('module.roi_heads.box.predictor.cls_score.bias' , aimclass)] 7 | t = torch.empty(change[0][1]) 8 | torch.nn.init.normal_(t, std=0.001) 9 | model[change[0][0]] = t 10 | t = torch.empty(change[1][1]) 11 | torch.nn.init.constant_(t, 0) 12 | model[change[1][0]] = t 13 | checkpoint = dict(model=model) 14 | torch.save(checkpoint, 'coco_baseline_base_pretrained.pth') 15 | -------------------------------------------------------------------------------- /tools/fewshot_exp/trans_baseline_voc_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch 2 | #from torch.nn import functional as F 3 | import sys 4 | split=int(sys.argv[1]) 5 | checkpoint = torch.load('model_baseline_voc_split%d_base.pth'%split, map_location=torch.device("cpu")) 6 | model = checkpoint['model'] 7 | aimclass = 21 8 | change = [('module.roi_heads.box.predictor.cls_score.weight', (aimclass, 1024)), 9 | ('module.roi_heads.box.predictor.cls_score.bias' , aimclass)] 10 | t = torch.empty(change[0][1]) 11 | torch.nn.init.normal_(t, std=0.001) 12 | model[change[0][0]] = t 13 | t = torch.empty(change[1][1]) 14 | torch.nn.init.constant_(t, 0) 15 | model[change[1][0]] = t 16 | checkpoint = dict(model=model) 17 | torch.save(checkpoint, 'voc0712_baseline_split%dbase_pretrained.pth'%split) 18 | -------------------------------------------------------------------------------- /tools/fewshot_exp/trans_coco_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch 2 | checkpoint = torch.load('model_coco_base.pth', map_location=torch.device("cpu")) 3 | model = checkpoint['model'] 4 | aimclass = 81 5 | change = [('module.roi_heads.box.predictor.cls_score.weight', (aimclass, 1024)), 6 | ('module.roi_heads.box.predictor.cls_score.bias' , aimclass)] 7 | t = torch.empty(change[0][1]) 8 | torch.nn.init.normal_(t, std=0.001) 9 | model[change[0][0]] = t 10 | t = torch.empty(change[1][1]) 11 | torch.nn.init.constant_(t, 0) 12 | model[change[1][0]] = t 13 | checkpoint = dict(model=model) 14 | torch.save(checkpoint, 'coco_base_pretrained.pth') 15 | -------------------------------------------------------------------------------- /tools/fewshot_exp/trans_voc_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch 2 | #from torch.nn import functional as F 3 | import sys 4 | split=int(sys.argv[1]) 5 | checkpoint = torch.load('model_voc_split%d_base.pth'%split, map_location=torch.device("cpu")) 6 | model = checkpoint['model'] 7 | aimclass = 21 8 | change = [('module.roi_heads.box.predictor.cls_score.weight', (aimclass, 1024)), 9 | ('module.roi_heads.box.predictor.cls_score.bias' , aimclass)] 10 | t = torch.empty(change[0][1]) 11 | torch.nn.init.normal_(t, std=0.001) 12 | model[change[0][0]] = t 13 | t = torch.empty(change[1][1]) 14 | torch.nn.init.constant_(t, 0) 15 | model[change[1][0]] = t 16 | checkpoint = dict(model=model) 17 | torch.save(checkpoint, 'voc0712_split%dbase_pretrained.pth'%split) 18 | --------------------------------------------------------------------------------