├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── NOTICE.txt ├── README.md ├── SECURITY.md ├── classification ├── PACS │ ├── main_agg.py │ ├── main_agg_SNR_causality.py │ ├── model_resnet.py │ ├── resnet_SNR.py │ ├── resnet_vanilla.py │ ├── run_main_agg.sh │ └── run_main_agg_snr_causality.sh ├── README.md ├── common │ ├── data_reader.py │ └── utils.py └── requirements.txt ├── detection ├── INSTALL.md ├── README.md ├── configs │ ├── caffe2 │ │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml │ │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml │ │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── cityscapes │ │ ├── README.md │ │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml │ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml │ ├── da_faster_rcnn │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes_source_only.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_kitti.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_kitti_source_only.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_kitti_to_cityscapes.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_SNR_kitti_to_cityscapes_source_only.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_cityscapes_to_foggy_cityscapes.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_cityscapes_to_foggy_cityscapes_source_only.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_cityscapes_to_kitti.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_cityscapes_to_kitti_source_only.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_kitti_to_cityscapes.yaml │ │ ├── e2e_da_faster_rcnn_R_50_C4_kitti_to_cityscapes_source_only.yaml │ │ └── e2e_da_faster_rcnn_R_50_C4_sim10k.yaml │ ├── dcn │ │ ├── README.md │ │ ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml │ │ ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml │ │ ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml │ │ └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml │ ├── e2e_faster_rcnn_R_101_FPN_1x.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml │ ├── e2e_faster_rcnn_fbnet.yaml │ ├── e2e_faster_rcnn_fbnet_600.yaml │ ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_periodically_testing.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml │ ├── e2e_mask_rcnn_fbnet.yaml │ ├── e2e_mask_rcnn_fbnet_600.yaml │ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml │ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml │ ├── gn_baselines │ │ ├── README.md │ │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml │ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml │ │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ ├── pascal_voc │ │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml │ │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml │ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml │ ├── quick_schedules │ │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml │ │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ │ ├── rpn_R_50_C4_quick.yaml │ │ └── rpn_R_50_FPN_quick.yaml │ ├── retinanet │ │ ├── retinanet_R-101-FPN_1x.yaml │ │ ├── retinanet_R-101-FPN_P5_1x.yaml │ │ ├── retinanet_R-50-FPN_1x.yaml │ │ ├── retinanet_R-50-FPN_1x_quick.yaml │ │ ├── retinanet_R-50-FPN_P5_1x.yaml │ │ └── retinanet_X_101_32x8d_FPN_1x.yaml │ ├── rpn_R_101_FPN_1x.yaml │ ├── rpn_R_50_C4_1x.yaml │ ├── rpn_R_50_FPN_1x.yaml │ ├── rpn_X_101_32x8d_FPN_1x.yaml │ └── test_time_aug │ │ └── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── demo │ ├── Mask_R-CNN_demo.ipynb │ ├── README.md │ ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png │ ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png │ ├── panoptic_segmentation_shapes_dataset_demo.ipynb │ ├── predictor.py │ ├── shapes_dataset_demo.ipynb │ ├── shapes_pruning.ipynb │ └── webcam.py ├── docker │ ├── Dockerfile │ └── docker-jupyter │ │ ├── Dockerfile │ │ └── jupyter_notebook_config.py ├── logs │ └── city2foggy_r50_consistency_log.txt ├── maskrcnn_benchmark │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── defaults.py │ │ └── paths_catalog.py │ ├── csrc │ │ ├── ROIAlign.h │ │ ├── ROIPool.h │ │ ├── SigmoidFocalLoss.h │ │ ├── cpu │ │ │ ├── ROIAlign_cpu.cpp │ │ │ ├── nms_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda │ │ │ ├── ROIAlign_cuda.cu │ │ │ ├── ROIPool_cuda.cu │ │ │ ├── SigmoidFocalLoss_cuda.cu │ │ │ ├── deform_conv_cuda.cu │ │ │ ├── deform_conv_kernel_cuda.cu │ │ │ ├── deform_pool_cuda.cu │ │ │ ├── deform_pool_kernel_cuda.cu │ │ │ ├── nms.cu │ │ │ └── vision.h │ │ ├── deform_conv.h │ │ ├── deform_pool.h │ │ ├── nms.h │ │ └── vision.cpp │ ├── data │ │ ├── README.md │ │ ├── __init__.py │ │ ├── build.py │ │ ├── collate_batch.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── abstract.py │ │ │ ├── coco.py │ │ │ ├── concat_dataset.py │ │ │ ├── evaluation │ │ │ │ ├── __init__.py │ │ │ │ ├── coco │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── coco_eval.py │ │ │ │ └── voc │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── voc_eval.py │ │ │ ├── list_dataset.py │ │ │ └── voc.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── distributed.py │ │ │ ├── grouped_batch_sampler.py │ │ │ └── iteration_based_batch_sampler.py │ │ └── transforms │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ └── transforms.py │ ├── engine │ │ ├── __init__.py │ │ ├── bbox_aug.py │ │ ├── inference.py │ │ └── trainer.py │ ├── layers │ │ ├── __init__.py │ │ ├── _utils.py │ │ ├── batch_norm.py │ │ ├── consistency_loss.py │ │ ├── dcn │ │ │ ├── __init__.py │ │ │ ├── deform_conv_func.py │ │ │ ├── deform_conv_module.py │ │ │ ├── deform_pool_func.py │ │ │ └── deform_pool_module.py │ │ ├── gradient_scalar_layer.py │ │ ├── misc.py │ │ ├── nms.py │ │ ├── roi_align.py │ │ ├── roi_pool.py │ │ ├── sigmoid_focal_loss.py │ │ └── smooth_l1_loss.py │ ├── modeling │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── backbone.py │ │ │ ├── fbnet.py │ │ │ ├── fbnet_builder.py │ │ │ ├── fbnet_modeldef.py │ │ │ ├── fpn.py │ │ │ └── resnet.py │ │ ├── balanced_positive_negative_sampler.py │ │ ├── box_coder.py │ │ ├── da_heads │ │ │ ├── da_heads.py │ │ │ └── loss.py │ │ ├── detector │ │ │ ├── __init__.py │ │ │ ├── detectors.py │ │ │ └── generalized_rcnn.py │ │ ├── make_layers.py │ │ ├── matcher.py │ │ ├── poolers.py │ │ ├── registry.py │ │ └── roi_heads │ │ │ └── box_head │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ ├── solver │ │ ├── __init__.py │ │ ├── build.py │ │ └── lr_scheduler.py │ ├── structures │ │ ├── bounding_box.py │ │ ├── boxlist_ops.py │ │ ├── image_list.py │ │ ├── keypoint.py │ │ └── segmentation_mask.py │ └── utils │ │ ├── README.md │ │ ├── c2_model_loading.py │ │ ├── checkpoint.py │ │ ├── collect_env.py │ │ ├── comm.py │ │ ├── cv2_util.py │ │ ├── env.py │ │ ├── imports.py │ │ ├── logger.py │ │ ├── metric_logger.py │ │ ├── miscellaneous.py │ │ ├── model_serialization.py │ │ ├── model_zoo.py │ │ ├── registry.py │ │ └── timer.py ├── requirements.txt ├── setup.py ├── tests │ ├── checkpoint.py │ ├── env_tests │ │ └── env.py │ ├── test_backbones.py │ ├── test_box_coder.py │ ├── test_configs.py │ ├── test_data_samplers.py │ ├── test_detectors.py │ ├── test_fbnet.py │ ├── test_feature_extractors.py │ ├── test_metric_logger.py │ ├── test_nms.py │ ├── test_predictors.py │ ├── test_rpn_heads.py │ ├── test_segmentation_mask.py │ └── utils.py └── tools │ ├── cityscapes │ ├── convert_cityscapes_to_caronly_coco.py │ ├── convert_cityscapes_to_coco.py │ ├── convert_foggy_cityscapes_to_coco.py │ └── instances2dict_with_polygons.py │ ├── kitti │ └── convert_kitti_to_coco.py │ ├── test_net.py │ ├── test_net_batch.py │ └── train_net.py ├── imgs └── pipeline.png └── segmentation ├── README.md ├── datasets ├── NTHU_list │ ├── Rio │ │ └── List │ │ │ ├── test.txt │ │ │ └── train.txt │ ├── Rome │ │ └── List │ │ │ ├── test.txt │ │ │ └── train.txt │ ├── Taipei │ │ └── List │ │ │ ├── test.txt │ │ │ └── train.txt │ └── Tokyo │ │ └── List │ │ ├── test.txt │ │ └── train.txt ├── __init__.py ├── city_list │ ├── test.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt ├── cityscapes_Dataset.py ├── crosscity_Dataset.py ├── gta5_Dataset.py ├── gta5_list │ ├── all.txt │ ├── test.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt ├── synthia_Dataset.py └── synthia_list │ ├── train.txt │ └── val.txt ├── graphs ├── __init__.py └── models │ ├── ResNet101.py │ ├── __init__.py │ └── deeplab_multi.py ├── requirements.txt ├── tools ├── evaluate.py ├── solve_crosscity.py ├── solve_gta5.py └── train_source.py └── utils ├── __init__.py ├── eval.py ├── loss.py └── train_helper.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /classification/PACS/main_agg.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from model_resnet import ModelAggregate 4 | 5 | 6 | def main(): 7 | train_arg_parser = argparse.ArgumentParser(description="parser") 8 | train_arg_parser.add_argument("--seed", type=int, default=1, 9 | help="") 10 | train_arg_parser.add_argument("--test_every", type=int, default=50, 11 | help="") 12 | train_arg_parser.add_argument("--batch_size", type=int, default=128, 13 | help="") 14 | train_arg_parser.add_argument("--num_classes", type=int, default=10, 15 | help="") 16 | train_arg_parser.add_argument("--step_size", type=int, default=1, 17 | help="") 18 | train_arg_parser.add_argument("--bn_eval", type=int, default=1, 19 | help="") 20 | train_arg_parser.add_argument("--loops_train", type=int, default=200000, 21 | help="") 22 | train_arg_parser.add_argument("--unseen_index", type=int, default=0, 23 | help="") 24 | train_arg_parser.add_argument("--lr", type=float, default=0.0001, 25 | help='') 26 | train_arg_parser.add_argument("--weight_decay", type=float, default=0.00005, 27 | help='') 28 | train_arg_parser.add_argument("--momentum", type=float, default=0.9, 29 | help='') 30 | train_arg_parser.add_argument("--logs", type=str, default='logs/', 31 | help='') 32 | train_arg_parser.add_argument("--model_path", type=str, default='', 33 | help='') 34 | train_arg_parser.add_argument("--state_dict", type=str, default='', 35 | help='') 36 | train_arg_parser.add_argument("--data_root", type=str, default='', 37 | help='') 38 | train_arg_parser.add_argument("--deterministic", type=bool, default=False, 39 | help='') 40 | args = train_arg_parser.parse_args() 41 | 42 | model_obj = ModelAggregate(flags=args) 43 | model_obj.train(flags=args) 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /classification/PACS/main_agg_SNR_causality.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | import argparse 4 | 5 | from model_resnet import ModelAggregate_SNR_CausalityLoss 6 | 7 | 8 | def main(): 9 | train_arg_parser = argparse.ArgumentParser(description="parser") 10 | train_arg_parser.add_argument("--seed", type=int, default=1, 11 | help="") 12 | train_arg_parser.add_argument("--test_every", type=int, default=50, 13 | help="") 14 | train_arg_parser.add_argument("--batch_size", type=int, default=128, 15 | help="") 16 | train_arg_parser.add_argument("--num_classes", type=int, default=10, 17 | help="") 18 | train_arg_parser.add_argument("--step_size", type=int, default=1, 19 | help="") 20 | train_arg_parser.add_argument("--bn_eval", type=int, default=1, 21 | help="") 22 | train_arg_parser.add_argument("--loops_train", type=int, default=200000, 23 | help="") 24 | train_arg_parser.add_argument("--unseen_index", type=int, default=0, 25 | help="") 26 | train_arg_parser.add_argument("--lr", type=float, default=0.0001, 27 | help='') 28 | train_arg_parser.add_argument("--weight_decay", type=float, default=0.00005, 29 | help='') 30 | train_arg_parser.add_argument("--momentum", type=float, default=0.9, 31 | help='') 32 | train_arg_parser.add_argument("--logs", type=str, default='logs/', 33 | help='') 34 | train_arg_parser.add_argument("--model_path", type=str, default='', 35 | help='') 36 | train_arg_parser.add_argument("--state_dict", type=str, default='', 37 | help='') 38 | train_arg_parser.add_argument("--data_root", type=str, default='', 39 | help='') 40 | train_arg_parser.add_argument("--deterministic", type=bool, default=False, 41 | help='') 42 | args = train_arg_parser.parse_args() 43 | 44 | model_obj = ModelAggregate_SNR_CausalityLoss(flags=args) 45 | model_obj.train(flags=args) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /classification/PACS/run_main_agg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | max=3 5 | lr=0.001 6 | lps=3001 7 | det=True 8 | 9 | for i in `seq 0 $max` 10 | do 11 | python main_agg.py \ 12 | --lr=$lr \ 13 | --num_classes=7 \ 14 | --test_every=100 \ 15 | --logs='agg/logs_'$i'/' \ 16 | --batch_size=32 \ 17 | --model_path='agg/models_'$i'/' \ 18 | --unseen_index=$i \ 19 | --loops_train=$lps \ 20 | --step_size=$lps \ 21 | --state_dict=$2 \ 22 | --data_root=$1 \ 23 | --weight_decay=1e-4 \ 24 | --momentum=0.9 \ 25 | --deterministic=$det 26 | done 27 | -------------------------------------------------------------------------------- /classification/PACS/run_main_agg_snr_causality.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | #!/usr/bin/env bash 4 | 5 | 6 | max=3 7 | lr=0.001 8 | lps=3001 9 | det=True 10 | 11 | for i in `seq 0 $max` 12 | do 13 | python main_agg_SNR_causality.py \ 14 | --lr=$lr \ 15 | --num_classes=7 \ 16 | --test_every=100 \ 17 | --logs='agg_SNR_causality/logs_'$i'/' \ 18 | --batch_size=32 \ 19 | --model_path='agg_SNR_causality/models_'$i'/' \ 20 | --unseen_index=$i \ 21 | --loops_train=$lps \ 22 | --step_size=$lps \ 23 | --state_dict=$2 \ 24 | --data_root=$1 \ 25 | --weight_decay=1e-4 \ 26 | --momentum=0.9 \ 27 | --deterministic=$det 28 | done 29 | -------------------------------------------------------------------------------- /classification/README.md: -------------------------------------------------------------------------------- 1 | # SNR-DG 2 | This is the repo for reproducing the results for domain generalization of classification task in the paper 'Style Normalization and Restitution for Domain Generalization and Adaptation'. We use Epi-FCR (https://github.com/HAHA-DL/Episodic-DG) as our code framework to validate the effectiveness of SNR on the PACS dataset. 3 | ## Datasets preparation 4 | Please download the data from https://drive.google.com/open?id=0B6x7gtvErXgfUU1WcGY5SzdwZVk and use the official train/val split. 5 | ### ImageNet pretrained model 6 | We use the pytorch pretrained ResNet-18 model from https://download.pytorch.org/models/resnet18-5c106cde.pth 7 | 8 | ## Enviroments 9 | 10 | > pytorch 1.0.0 \ 11 | > Python 3.7.3 \ 12 | > Ubuntu 16.04.6 13 | 14 | ## Run 15 | 16 | > Baseline: 17 | 18 | sh run_main_agg.sh #data_folder #model_path 19 | 20 | > Our SNR with the proposed dual causality loss: 21 | 22 | sh run_main_agg_snr_causality.sh #data_folder #model_path 23 | 24 | 25 | ### Correspondence with the paper 26 | 27 | Please refer to the Section 4.1 of our paper. 28 | 29 | 30 | ### Reference 31 | If you feel the paper useful or consider using the code, please cite the paper: 32 | 33 | ``` 34 | @article{jin2021style, 35 | title={Style Normalization and Restitution for Domain Generalization and Adaptation}, 36 | author={Jin, Xin and Lan, Cuiling and Zeng, Wenjun and Chen, Zhibo}, 37 | journal={arXiv preprint arXiv:2101.00588}, 38 | year={2021} 39 | } 40 | ``` 41 | 42 | ### Note 43 | 44 | When working with a different enviroment, you may get different results and need to tune the hyper parameters. 45 | 46 | -------------------------------------------------------------------------------- /classification/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.0.0 2 | numpy==1.16.4 3 | scipy==1.2.1 -------------------------------------------------------------------------------- /detection/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/ 5 | - torchvision from master 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - (optional) OpenCV for the webcam demo 11 | 12 | 13 | ### Option 1: Step-by-step installation 14 | 15 | ```bash 16 | # first, make sure that your conda is setup properly with the right environment 17 | # for that, check that `which conda`, `which pip` and `which python` points to the 18 | # right path. From a clean conda env, this is what you need to do 19 | 20 | conda create --name maskrcnn_benchmark 21 | source activate maskrcnn_benchmark 22 | 23 | # this installs the right pip and dependencies for the fresh python 24 | conda install ipython 25 | 26 | # maskrcnn_benchmark and coco api dependencies 27 | pip install ninja yacs cython matplotlib 28 | 29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 30 | # we give the instructions for CUDA 9.0 31 | conda install pytorch-nightly -c pytorch 32 | 33 | # install torchvision 34 | cd ~/github 35 | git clone https://github.com/pytorch/vision.git 36 | cd vision 37 | python setup.py install 38 | 39 | # install pycocotools 40 | cd ~/github 41 | git clone https://github.com/cocodataset/cocoapi.git 42 | cd cocoapi/PythonAPI 43 | python setup.py build_ext install 44 | 45 | # install PyTorch Detection 46 | cd ~/github 47 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 48 | cd maskrcnn-benchmark 49 | # the following will install the lib with 50 | # symbolic links, so that you can modify 51 | # the files if you want and won't need to 52 | # re-build it 53 | python setup.py build develop 54 | 55 | # or if you are on macOS 56 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 57 | ``` 58 | 59 | ### Option 2: Docker Image (Requires CUDA, Linux only) 60 | 61 | Build image with defaults (`CUDA=9.0`, `CUDNN=7`): 62 | 63 | nvidia-docker build -t maskrcnn-benchmark docker/ 64 | 65 | Build image with other CUDA and CUDNN versions: 66 | 67 | nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ 68 | 69 | Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook): 70 | 71 | nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/ 72 | nvidia-docker run -td -p 8888:8888 -e PASSWORD= -v : maskrcnn-benchmark-jupyter -------------------------------------------------------------------------------- /detection/README.md: -------------------------------------------------------------------------------- 1 | # SNR-Object detection 2 | 3 | This is the repo for reproducing the results for domain generalization, and unsupervised domain adaptation for object detection task, respectively, in the paper 'Style Normalization and Restitution for Domain Generalization and Adaptation'. We use the codebase from 'Domain Adaptive Faster R-CNN for Object Detection in the Wild' (https://github.com/krumo/Domain-Adaptive-Faster-RCNN-PyTorch/blob/master/LICENSE_maskrcnn_benchmark). 4 | 5 | 6 | ## Installation 7 | 8 | Please follow the instruction in [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) to install and use Domain-Adaptive-Faster-RCNN-PyTorch. 9 | 10 | ## Datasets 11 | 12 | Please refer to the semantic segmentation sub-section for more details. 13 | 14 | 15 | ## Example Usage 16 | Domain Generalization: If pretrain the model only on the source domain, this corresponds to our SNR scheme on the domain generalization setting, where you can change the backbone from DA Faster R-CNN to the naive Faster R-CNN. 17 | 18 | Unsupervised Domain Adaptation: (Note) Please refer to the scheme of 'SNR-DA Faster R-CNN (ours)' in Section 4.3- of our paper. 19 | An example of Domain Adaptive Faster R-CNN with FPN adapting from **Cityscapes** dataset to **Foggy Cityscapes** dataset is provided: 20 | 1. Follow the example in [Detectron-DA-Faster-RCNN](https://github.com/krumo/Detectron-DA-Faster-RCNN) to download dataset and generate coco style annoation files 21 | 2. Symlink the path to the Cityscapes and Foggy Cityscapes dataset to `datasets/` as follows: 22 | ```bash 23 | # symlink the dataset 24 | cd ~/github/Domain-Adaptive-Faster-RCNN-PyTorch 25 | ln -s // datasets/cityscapes 26 | ln -s // datasets/foggy_cityscapes 27 | ``` 28 | 3. Train the SNR module integrated Domain Adaptive Faster R-CNN: 29 | ``` 30 | python tools/train_net.py --config-file "configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes.yaml" 31 | ``` 32 | 4. Test the trained model: 33 | ``` 34 | python tools/test_net.py --config-file "configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes.yaml" MODEL.WEIGHT /model_final.pth 35 | ``` 36 | ### Citation 37 | 38 | If you feel the paper useful or consider using the code, please cite the paper: 39 | 40 | ``` 41 | @article{jin2021style, 42 | title={Style Normalization and Restitution for Domain Generalization and Adaptation}, 43 | author={Jin, Xin and Lan, Cuiling and Zeng, Wenjun and Chen, Zhibo}, 44 | journal={arXiv preprint arXiv:2101.00588}, 45 | year={2021} 46 | } 47 | ``` 48 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 2 23 | ROI_KEYPOINT_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 26 | PREDICTOR: "KeypointRCNNPredictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 56 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | KEYPOINT_ON: True 32 | DATASETS: 33 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 34 | TEST: ("keypoints_coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | SOLVER: 40 | BASE_LR: 0.02 41 | WEIGHT_DECAY: 0.0001 42 | STEPS: (60000, 80000) 43 | MAX_ITER: 90000 44 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x" 4 | BACKBONE: 5 | CONV_BODY: "R-152-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /detection/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /detection/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 9 23 | DATASETS: 24 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 25 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.01 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (18000,) 32 | MAX_ITER: 24000 33 | -------------------------------------------------------------------------------- /detection/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 9 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 34 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (18000,) 41 | MAX_ITER: 24000 42 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 9 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 22 | SOURCE_TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 23 | TARGET_TRAIN: ("foggy_cityscapes_fine_instanceonly_seg_train_cocostyle",) 24 | TEST: ("foggy_cityscapes_fine_instanceonly_seg_val_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_foggy_cityscapes_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 9 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 22 | SOURCE_TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 23 | TARGET_TRAIN: ("foggy_cityscapes_fine_instanceonly_seg_train_cocostyle",) 24 | TEST: ("foggy_cityscapes_fine_instanceonly_seg_val_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_kitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 2 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("cityscapes_car_train_cocostyle",) 22 | SOURCE_TRAIN: ("cityscapes_car_train_cocostyle",) 23 | TARGET_TRAIN: ("kitti_cocostyle",) 24 | TEST: ("kitti_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_cityscapes_to_kitti_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 2 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("cityscapes_car_train_cocostyle",) 22 | SOURCE_TRAIN: ("cityscapes_car_train_cocostyle",) 23 | TARGET_TRAIN: ("kitti_cocostyle",) 24 | TEST: ("kitti_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_kitti_to_cityscapes.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 2 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("kitti_cocostyle",) 22 | SOURCE_TRAIN: ("kitti_cocostyle",) 23 | TARGET_TRAIN: ("cityscapes_car_train_cocostyle",) 24 | TEST: ("cityscapes_car_val_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_SNR_kitti_to_cityscapes_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | BACKBONE: 6 | CONV_BODY: "R-50-C4-SNR" 7 | RPN: 8 | PRE_NMS_TOP_N_TEST: 6000 9 | POST_NMS_TOP_N_TEST: 1000 10 | ROI_BOX_HEAD: 11 | NUM_CLASSES: 2 12 | ROI_HEADS: 13 | BATCH_SIZE_PER_IMAGE: 256 14 | NMS: 0.3 15 | DA_HEADS: 16 | DA_IMG_GRL_WEIGHT: 0.1 17 | DA_INS_GRL_WEIGHT: 0.1 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | DATASETS: 21 | TRAIN: ("kitti_cocostyle",) 22 | SOURCE_TRAIN: ("kitti_cocostyle",) 23 | TARGET_TRAIN: ("cityscapes_car_train_cocostyle",) 24 | TEST: ("cityscapes_car_val_cocostyle",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (600,) 27 | MAX_SIZE_TRAIN: 1200 28 | MIN_SIZE_TEST: 600 29 | MAX_SIZE_TEST: 1200 30 | SOLVER: 31 | BASE_LR: 0.001 32 | WEIGHT_DECAY: 0.0005 33 | STEPS: (50000,) 34 | MAX_ITER: 70000 35 | IMS_PER_BATCH: 2 36 | TEST: 37 | IMS_PER_BATCH: 1 38 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_cityscapes_to_foggy_cityscapes.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 9 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 20 | SOURCE_TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 21 | TARGET_TRAIN: ("foggy_cityscapes_fine_instanceonly_seg_train_cocostyle",) 22 | TEST: ("foggy_cityscapes_fine_instanceonly_seg_val_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_cityscapes_to_foggy_cityscapes_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 9 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 20 | SOURCE_TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 21 | TARGET_TRAIN: ("foggy_cityscapes_fine_instanceonly_seg_train_cocostyle",) 22 | TEST: ("foggy_cityscapes_fine_instanceonly_seg_val_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_cityscapes_to_kitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 2 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("cityscapes_car_train_cocostyle",) 20 | SOURCE_TRAIN: ("cityscapes_car_train_cocostyle",) 21 | TARGET_TRAIN: ("kitti_cocostyle",) 22 | TEST: ("kitti_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_cityscapes_to_kitti_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 2 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("cityscapes_car_train_cocostyle",) 20 | SOURCE_TRAIN: ("cityscapes_car_train_cocostyle",) 21 | TARGET_TRAIN: ("kitti_cocostyle",) 22 | TEST: ("kitti_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_kitti_to_cityscapes.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 2 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("kitti_cocostyle",) 20 | SOURCE_TRAIN: ("kitti_cocostyle",) 21 | TARGET_TRAIN: ("cityscapes_car_train_cocostyle",) 22 | TEST: ("cityscapes_car_val_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_kitti_to_cityscapes_source_only.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: False 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 2 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("kitti_cocostyle",) 20 | SOURCE_TRAIN: ("kitti_cocostyle",) 21 | TARGET_TRAIN: ("cityscapes_car_train_cocostyle",) 22 | TEST: ("cityscapes_car_val_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/da_faster_rcnn/e2e_da_faster_rcnn_R_50_C4_sim10k.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | DOMAIN_ADAPTATION_ON: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 6000 7 | POST_NMS_TOP_N_TEST: 1000 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 2 10 | ROI_HEADS: 11 | BATCH_SIZE_PER_IMAGE: 256 12 | NMS: 0.3 13 | DA_HEADS: 14 | DA_IMG_GRL_WEIGHT: 0.1 15 | DA_INS_GRL_WEIGHT: 0.1 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 32 18 | DATASETS: 19 | TRAIN: ("sim10k_cocostyle",) 20 | SOURCE_TRAIN: ("sim10k_cocostyle",) 21 | TARGET_TRAIN: ("cityscapes_car_train_cocostyle",) 22 | TEST: ("cityscapes_car_val_cocostyle",) 23 | INPUT: 24 | MIN_SIZE_TRAIN: (600,) 25 | MAX_SIZE_TRAIN: 1200 26 | MIN_SIZE_TEST: 600 27 | MAX_SIZE_TEST: 1200 28 | SOLVER: 29 | BASE_LR: 0.001 30 | WEIGHT_DECAY: 0.0005 31 | STEPS: (50000,) 32 | MAX_ITER: 70000 33 | IMS_PER_BATCH: 2 34 | TEST: 35 | IMS_PER_BATCH: 1 36 | -------------------------------------------------------------------------------- /detection/configs/dcn/README.md: -------------------------------------------------------------------------------- 1 | ### Reference 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf) 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) 4 | 5 | ### Performance 6 | | case | bbox AP | mask AP | 7 | |----------------------------:|--------:|:-------:| 8 | | R-50-FPN-dcn (implement) | 39.8 | - | 9 | | R-50-FPN-dcn (mmdetection) | 40.0 | - | 10 | | R-50-FPN-mdcn (implement) | 40.0 | - | 11 | | R-50-FPN-mdcn (mmdetection) | 40.3 | - | 12 | | R-50-FPN-dcn (implement) | 40.8 | 36.8 | 13 | | R-50-FPN-dcn (mmdetection) | 41.1 | 37.2 | 14 | | R-50-FPN-dcn (implement) | 40.7 | 36.7 | 15 | | R-50-FPN-dcn (mmdetection) | 41.4 | 37.4 | 16 | 17 | 18 | ### Note 19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details. 20 | 21 | 22 | ### Usage 23 | add these three lines 24 | ``` 25 | MODEL: 26 | RESNETS: 27 | # corresponding to C2,C3,C4,C5 28 | STAGE_WITH_DCN: (False, True, True, True) 29 | WITH_MODULATED_DCN: True 30 | DEFORMABLE_GROUPS: 1 31 | ``` -------------------------------------------------------------------------------- /detection/configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /detection/configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /detection/configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | 55 | -------------------------------------------------------------------------------- /detection/configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 100 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 512 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (320, ) 40 | MAX_SIZE_TRAIN: 640 41 | MIN_SIZE_TEST: 320 42 | MAX_SIZE_TEST: 640 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /detection/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "cham_v1a" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 128 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.045 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (90000, 120000) 34 | MAX_ITER: 135000 35 | IMS_PER_BATCH: 96 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /detection/configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 2 23 | ROI_KEYPOINT_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 26 | PREDICTOR: "KeypointRCNNPredictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 56 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | KEYPOINT_ON: True 32 | DATASETS: 33 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 34 | TEST: ("keypoints_coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | SOLVER: 40 | BASE_LR: 0.02 41 | WEIGHT_DECAY: 0.0001 42 | STEPS: (60000, 80000) 43 | MAX_ITER: 90000 44 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_R_50_FPN_1x_periodically_testing.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | TEST_PERIOD: 2500 43 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: -1.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 512 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /detection/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /detection/configs/gn_baselines/README.md: -------------------------------------------------------------------------------- 1 | ### Group Normalization 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494) 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md) 5 | 6 | 7 | ### Performance 8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP | 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:| 10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 | 11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 | 12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 | 13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 | 14 | -------------------------------------------------------------------------------- /detection/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNPredictor" 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | # Assume 8 gpus 42 | BASE_LR: 0.02 43 | WEIGHT_DECAY: 0.0001 44 | STEPS: (60000, 80000) 45 | MAX_ITER: 90000 46 | IMS_PER_BATCH: 16 47 | TEST: 48 | IMS_PER_BATCH: 8 49 | -------------------------------------------------------------------------------- /detection/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | CONV_HEAD_DIM: 256 34 | NUM_STACKED_CONVS: 4 35 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (60000, 80000) 47 | MAX_ITER: 90000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNPredictor" 35 | ROI_MASK_HEAD: 36 | USE_GN: True # use GN for mask head 37 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 38 | CONV_LAYERS: (256, 256, 256, 256) 39 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 40 | PREDICTOR: "MaskRCNNC4Predictor" 41 | POOLER_RESOLUTION: 14 42 | POOLER_SAMPLING_RATIO: 2 43 | RESOLUTION: 28 44 | SHARE_BOX_FEATURE_EXTRACTOR: False 45 | MASK_ON: True 46 | DATASETS: 47 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 48 | TEST: ("coco_2014_minival",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | # Assume 8 gpus 53 | BASE_LR: 0.02 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (60000, 80000) 56 | MAX_ITER: 90000 57 | IMS_PER_BATCH: 16 58 | TEST: 59 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | CONV_HEAD_DIM: 256 34 | NUM_STACKED_CONVS: 4 35 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | ROI_MASK_HEAD: 38 | USE_GN: True # use GN for mask head 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | CONV_LAYERS: (256, 256, 256, 256) 41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 42 | PREDICTOR: "MaskRCNNC4Predictor" 43 | POOLER_RESOLUTION: 14 44 | POOLER_SAMPLING_RATIO: 2 45 | RESOLUTION: 28 46 | SHARE_BOX_FEATURE_EXTRACTOR: False 47 | MASK_ON: True 48 | DATASETS: 49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 50 | TEST: ("coco_2014_minival",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | # Assume 8 gpus 55 | BASE_LR: 0.02 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (60000, 80000) 58 | MAX_ITER: 90000 59 | IMS_PER_BATCH: 16 60 | TEST: 61 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (210000, 250000) 46 | MAX_ITER: 270000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (210000, 250000) 48 | MAX_ITER: 270000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (210000, 250000) 57 | MAX_ITER: 270000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /detection/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (210000, 250000) 59 | MAX_ITER: 270000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /detection/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.001 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (50000, ) 17 | MAX_ITER: 70000 18 | IMS_PER_BATCH: 1 19 | TEST: 20 | IMS_PER_BATCH: 1 21 | -------------------------------------------------------------------------------- /detection/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.004 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (12500, ) 17 | MAX_ITER: 17500 18 | IMS_PER_BATCH: 4 19 | TEST: 20 | IMS_PER_BATCH: 4 21 | -------------------------------------------------------------------------------- /detection/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 21 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("voc_2012_train_cocostyle",) 34 | TEST: ("voc_2012_val_cocostyle",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (18000,) 41 | MAX_ITER: 24000 42 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: (600,) 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_minival",) 25 | TEST: ("coco_2014_minival",) 26 | INPUT: 27 | MIN_SIZE_TRAIN: (600,) 28 | MAX_SIZE_TRAIN: 1000 29 | MIN_SIZE_TEST: 800 30 | MAX_SIZE_TEST: 1000 31 | DATALOADER: 32 | SIZE_DIVISIBILITY: 32 33 | SOLVER: 34 | BASE_LR: 0.005 35 | WEIGHT_DECAY: 0.0001 36 | STEPS: (1500,) 37 | MAX_ITER: 2000 38 | IMS_PER_BATCH: 4 39 | TEST: 40 | IMS_PER_BATCH: 2 41 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | RESNETS: 24 | STRIDE_IN_1X1: False 25 | NUM_GROUPS: 32 26 | WIDTH_PER_GROUP: 8 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: (600,) 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_minival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (1500,) 47 | MAX_ITER: 2000 48 | IMS_PER_BATCH: 4 49 | TEST: 50 | IMS_PER_BATCH: 2 51 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: (600,) 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_minival",) 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (600,) 37 | MAX_SIZE_TRAIN: 1000 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1000 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | BASE_LR: 0.005 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (1500,) 46 | MAX_ITER: 2000 47 | IMS_PER_BATCH: 4 48 | TEST: 49 | IMS_PER_BATCH: 2 50 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | RESNETS: 32 | STRIDE_IN_1X1: False 33 | NUM_GROUPS: 32 34 | WIDTH_PER_GROUP: 8 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: (600,) 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /detection/configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | DATASETS: 15 | TRAIN: ("coco_2014_minival",) 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: (600,) 19 | MAX_SIZE_TRAIN: 1000 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1000 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.005 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (1500,) 28 | MAX_ITER: 2000 29 | IMS_PER_BATCH: 4 30 | TEST: 31 | IMS_PER_BATCH: 2 32 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RETINANET: 28 | SCALES_PER_OCTAVE: 3 29 | STRADDLE_THRESH: -1 30 | FG_IOU_THRESHOLD: 0.5 31 | BG_IOU_THRESHOLD: 0.4 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (800, ) 37 | MAX_SIZE_TRAIN: 1333 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1333 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 4 gpus 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (120000, 160000) 47 | MAX_ITER: 180000 48 | IMS_PER_BATCH: 8 49 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RETINANET: 28 | SCALES_PER_OCTAVE: 3 29 | STRADDLE_THRESH: -1 30 | USE_C5: False 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800, ) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RETINANET: 28 | SCALES_PER_OCTAVE: 3 29 | STRADDLE_THRESH: -1 30 | FG_IOU_THRESHOLD: 0.5 31 | BG_IOU_THRESHOLD: 0.4 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (800,) 37 | MAX_SIZE_TRAIN: 1333 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1333 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 4 gpus 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (120000, 160000) 47 | MAX_ITER: 180000 48 | IMS_PER_BATCH: 8 49 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RETINANET: 28 | SCALES_PER_OCTAVE: 3 29 | STRADDLE_THRESH: -1 30 | FG_IOU_THRESHOLD: 0.5 31 | BG_IOU_THRESHOLD: 0.4 32 | DATASETS: 33 | TRAIN: ("coco_2014_minival",) 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: (600,) 37 | MAX_SIZE_TRAIN: 1000 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1000 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | BASE_LR: 0.005 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (3500,) 46 | MAX_ITER: 4000 47 | IMS_PER_BATCH: 4 48 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RETINANET: 28 | SCALES_PER_OCTAVE: 3 29 | STRADDLE_THRESH: -1 30 | USE_C5: False 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800,) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /detection/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | FG_IOU_THRESHOLD: 0.5 12 | BG_IOU_THRESHOLD: 0.4 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | RESNETS: 28 | STRIDE_IN_1X1: False 29 | NUM_GROUPS: 32 30 | WIDTH_PER_GROUP: 8 31 | RETINANET: 32 | SCALES_PER_OCTAVE: 3 33 | STRADDLE_THRESH: -1 34 | FG_IOU_THRESHOLD: 0.5 35 | BG_IOU_THRESHOLD: 0.4 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800, ) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (240000, 320000) 51 | MAX_ITER: 360000 52 | IMS_PER_BATCH: 4 53 | -------------------------------------------------------------------------------- /detection/configs/rpn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | DATALOADER: 18 | SIZE_DIVISIBILITY: 32 19 | SOLVER: 20 | BASE_LR: 0.02 21 | WEIGHT_DECAY: 0.0001 22 | STEPS: (60000, 80000) 23 | MAX_ITER: 90000 24 | -------------------------------------------------------------------------------- /detection/configs/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 10 | TEST: ("coco_2014_minival",) 11 | SOLVER: 12 | BASE_LR: 0.02 13 | WEIGHT_DECAY: 0.0001 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | -------------------------------------------------------------------------------- /detection/configs/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | DATALOADER: 18 | SIZE_DIVISIBILITY: 32 19 | SOLVER: 20 | BASE_LR: 0.02 21 | WEIGHT_DECAY: 0.0001 22 | STEPS: (60000, 80000) 23 | MAX_ITER: 90000 24 | -------------------------------------------------------------------------------- /detection/configs/rpn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | RESNETS: 15 | STRIDE_IN_1X1: False 16 | NUM_GROUPS: 32 17 | WIDTH_PER_GROUP: 8 18 | DATASETS: 19 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 20 | TEST: ("coco_2014_minival",) 21 | DATALOADER: 22 | SIZE_DIVISIBILITY: 32 23 | SOLVER: 24 | BASE_LR: 0.02 25 | WEIGHT_DECAY: 0.0001 26 | STEPS: (60000, 80000) 27 | MAX_ITER: 90000 28 | -------------------------------------------------------------------------------- /detection/configs/test_time_aug/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | TEST: 43 | BBOX_AUG: 44 | ENABLED: True 45 | H_FLIP: True 46 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 47 | MAX_SIZE: 2000 48 | SCALE_H_FLIP: True 49 | -------------------------------------------------------------------------------- /detection/demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference. 4 | 5 | 6 | ### With your preferred environment 7 | 8 | You can start it by running it from this folder, using one of the following commands: 9 | ```bash 10 | # by default, it runs on the GPU 11 | # for best results, use min-image-size 800 12 | python webcam.py --min-image-size 800 13 | # can also run it on the CPU 14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 15 | # or change the model that you want to use 16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu 17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 19 | ``` 20 | 21 | ### With Docker 22 | 23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions) 24 | 25 | Adjust permissions of the X server host (be careful with this step, refer to 26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives) 27 | 28 | ```bash 29 | xhost + 30 | ``` 31 | 32 | Then run a container with the demo: 33 | 34 | ``` 35 | docker run --rm -it \ 36 | -e DISPLAY=${DISPLAY} \ 37 | --privileged \ 38 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 39 | --device=/dev/video0:/dev/video0 \ 40 | --ipc=host maskrcnn-benchmark \ 41 | python demo/webcam.py --min-image-size 300 42 | ``` 43 | 44 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 45 | the volume mapping may vary depending on your platform* 46 | -------------------------------------------------------------------------------- /detection/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SNR/f3d51b5e3525fe5e1ea364fafdf0e4cc60b1362b/detection/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png -------------------------------------------------------------------------------- /detection/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SNR/f3d51b5e3525fe5e1ea364fafdf0e4cc60b1362b/detection/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png -------------------------------------------------------------------------------- /detection/demo/webcam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import argparse 3 | import cv2 4 | 5 | from maskrcnn_benchmark.config import cfg 6 | from predictor import COCODemo 7 | 8 | import time 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") 13 | parser.add_argument( 14 | "--config-file", 15 | default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", 16 | metavar="FILE", 17 | help="path to config file", 18 | ) 19 | parser.add_argument( 20 | "--confidence-threshold", 21 | type=float, 22 | default=0.7, 23 | help="Minimum score for the prediction to be shown", 24 | ) 25 | parser.add_argument( 26 | "--min-image-size", 27 | type=int, 28 | default=224, 29 | help="Smallest size of the image to feed to the model. " 30 | "Model was trained with 800, which gives best results", 31 | ) 32 | parser.add_argument( 33 | "--show-mask-heatmaps", 34 | dest="show_mask_heatmaps", 35 | help="Show a heatmap probability for the top masks-per-dim masks", 36 | action="store_true", 37 | ) 38 | parser.add_argument( 39 | "--masks-per-dim", 40 | type=int, 41 | default=2, 42 | help="Number of heatmaps per dimension to show", 43 | ) 44 | parser.add_argument( 45 | "opts", 46 | help="Modify model config options using the command-line", 47 | default=None, 48 | nargs=argparse.REMAINDER, 49 | ) 50 | 51 | args = parser.parse_args() 52 | 53 | # load config from file and command-line arguments 54 | cfg.merge_from_file(args.config_file) 55 | cfg.merge_from_list(args.opts) 56 | cfg.freeze() 57 | 58 | # prepare object that handles inference plus adds predictions on top of image 59 | coco_demo = COCODemo( 60 | cfg, 61 | confidence_threshold=args.confidence_threshold, 62 | show_mask_heatmaps=args.show_mask_heatmaps, 63 | masks_per_dim=args.masks_per_dim, 64 | min_image_size=args.min_image_size, 65 | ) 66 | 67 | cam = cv2.VideoCapture(0) 68 | while True: 69 | start_time = time.time() 70 | ret_val, img = cam.read() 71 | composite = coco_demo.run_on_opencv_image(img) 72 | print("Time: {:.2f} s / img".format(time.time() - start_time)) 73 | cv2.imshow("COCO detections", composite) 74 | if cv2.waitKey(1) == 27: 75 | break # esc to quit 76 | cv2.destroyAllWindows() 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /detection/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \ 11 | && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev 12 | 13 | # Install Miniconda 14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 15 | && chmod +x /miniconda.sh \ 16 | && /miniconda.sh -b -p /miniconda \ 17 | && rm /miniconda.sh 18 | 19 | ENV PATH=/miniconda/bin:$PATH 20 | 21 | # Create a Python 3.6 environment 22 | RUN /miniconda/bin/conda install -y conda-build \ 23 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 24 | && /miniconda/bin/conda clean -ya 25 | 26 | ENV CONDA_DEFAULT_ENV=py36 27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 28 | ENV PATH=$CONDA_PREFIX/bin:$PATH 29 | ENV CONDA_AUTO_UPDATE_CONDA=false 30 | 31 | RUN conda install -y ipython 32 | RUN pip install ninja yacs cython matplotlib opencv-python 33 | 34 | # Install PyTorch 1.0 Nightly and OpenCV 35 | RUN conda install -y pytorch-nightly -c pytorch \ 36 | && conda clean -ya 37 | 38 | # Install TorchVision master 39 | RUN git clone https://github.com/pytorch/vision.git \ 40 | && cd vision \ 41 | && python setup.py install 42 | 43 | # install pycocotools 44 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 45 | && cd cocoapi/PythonAPI \ 46 | && python setup.py build_ext install 47 | 48 | # install PyTorch Detection 49 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 50 | && cd maskrcnn-benchmark \ 51 | && python setup.py build develop 52 | 53 | WORKDIR /maskrcnn-benchmark 54 | -------------------------------------------------------------------------------- /detection/docker/docker-jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ 11 | 12 | # Install Miniconda 13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 14 | && chmod +x /miniconda.sh \ 15 | && /miniconda.sh -b -p /miniconda \ 16 | && rm /miniconda.sh 17 | 18 | ENV PATH=/miniconda/bin:$PATH 19 | 20 | # Create a Python 3.6 environment 21 | RUN /miniconda/bin/conda install -y conda-build \ 22 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 23 | && /miniconda/bin/conda clean -ya 24 | 25 | ENV CONDA_DEFAULT_ENV=py36 26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 27 | ENV PATH=$CONDA_PREFIX/bin:$PATH 28 | ENV CONDA_AUTO_UPDATE_CONDA=false 29 | 30 | RUN conda install -y ipython 31 | RUN pip install ninja yacs cython matplotlib jupyter 32 | 33 | # Install PyTorch 1.0 Nightly and OpenCV 34 | RUN conda install -y pytorch-nightly -c pytorch \ 35 | && conda install -y opencv -c menpo \ 36 | && conda clean -ya 37 | 38 | WORKDIR /root 39 | 40 | USER root 41 | 42 | RUN mkdir /notebooks 43 | 44 | WORKDIR /notebooks 45 | 46 | # Install TorchVision master 47 | RUN git clone https://github.com/pytorch/vision.git \ 48 | && cd vision \ 49 | && python setup.py install 50 | 51 | # install pycocotools 52 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 53 | && cd cocoapi/PythonAPI \ 54 | && python setup.py build_ext install 55 | 56 | # install PyTorch Detection 57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 58 | && cd maskrcnn-benchmark \ 59 | && python setup.py build develop 60 | 61 | RUN jupyter notebook --generate-config 62 | 63 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py" 64 | 65 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH} 66 | 67 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"] 68 | -------------------------------------------------------------------------------- /detection/docker/docker-jupyter/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from IPython.lib import passwd 3 | 4 | #c = c # pylint:disable=undefined-variable 5 | c = get_config() 6 | c.NotebookApp.ip = '0.0.0.0' 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888)) 8 | c.NotebookApp.open_browser = False 9 | 10 | # sets a password if PASSWORD is set in the environment 11 | if 'PASSWORD' in os.environ: 12 | password = os.environ['PASSWORD'] 13 | if password: 14 | c.NotebookApp.password = passwd(password) 15 | else: 16 | c.NotebookApp.password = '' 17 | c.NotebookApp.token = '' 18 | del os.environ['PASSWORD'] 19 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 22 | const at::Tensor& rois, 23 | const float spatial_scale, 24 | const int pooled_height, 25 | const int pooled_width, 26 | const int sampling_ratio); 27 | 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 29 | const at::Tensor& rois, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width, 37 | const int sampling_ratio); 38 | 39 | 40 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width); 45 | 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 47 | const at::Tensor& input, 48 | const at::Tensor& rois, 49 | const at::Tensor& argmax, 50 | const float spatial_scale, 51 | const int pooled_height, 52 | const int pooled_width, 53 | const int batch_size, 54 | const int channels, 55 | const int height, 56 | const int width); 57 | 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 59 | 60 | 61 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 62 | const int height, 63 | const int width); 64 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("nms", &nms, "non-maximum suppression"); 9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 13 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 14 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 15 | } 16 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | 6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"] 7 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/abstract.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class AbstractDataset(torch.utils.data.Dataset): 4 | """ 5 | Serves as a common interface to reduce boilerplate and help dataset 6 | customization 7 | 8 | A generic Dataset for the maskrcnn_benchmark must have the following 9 | non-trivial fields / methods implemented: 10 | CLASSES - list/tuple: 11 | A list of strings representing the classes. It must have 12 | "__background__" as its 0th element for correct id mapping. 13 | 14 | __getitem__ - function(idx): 15 | This has to return three things: img, target, idx. 16 | img is the input image, which has to be load as a PIL Image object 17 | implementing the target requires the most effort, since it must have 18 | multiple fields: the size, bounding boxes, labels (contiguous), and 19 | masks (either COCO-style Polygons, RLE or torch BinaryMask). 20 | Usually the target is a BoxList instance with extra fields. 21 | Lastly, idx is simply the input argument of the function. 22 | 23 | also the following is required: 24 | __len__ - function(): 25 | return the size of the dataset 26 | get_img_info - function(idx): 27 | return metadata, at least width and height of the input image 28 | """ 29 | 30 | def __init__(self, *args, **kwargs): 31 | self.name_to_id = None 32 | self.id_to_name = None 33 | 34 | 35 | def __getitem__(self, idx): 36 | raise NotImplementedError 37 | 38 | 39 | def initMaps(self): 40 | """ 41 | Can be called optionally to initialize the id<->category name mapping 42 | 43 | 44 | Initialize default mapping between: 45 | class <==> index 46 | class: this is a string that represents the class 47 | index: positive int, used directly by the ROI heads. 48 | 49 | 50 | NOTE: 51 | make sure that the background is always indexed by 0. 52 | "__background__" <==> 0 53 | 54 | if initialized by hand, double check that the indexing is correct. 55 | """ 56 | assert isinstance(self.CLASSES, (list, tuple)) 57 | assert self.CLASSES[0] == "__background__" 58 | cls = self.CLASSES 59 | self.name_to_id = dict(zip(cls, range(len(cls)))) 60 | self.id_to_name = dict(zip(range(len(cls)), cls)) 61 | 62 | 63 | def get_img_info(self, index): 64 | raise NotImplementedError 65 | 66 | 67 | def __len__(self): 68 | raise NotImplementedError 69 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | else: 26 | dataset_name = dataset.__class__.__name__ 27 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 28 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | else: 11 | min_size = cfg.INPUT.MIN_SIZE_TEST 12 | max_size = cfg.INPUT.MAX_SIZE_TEST 13 | flip_prob = 0 14 | 15 | to_bgr255 = cfg.INPUT.TO_BGR255 16 | normalize_transform = T.Normalize( 17 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 18 | ) 19 | 20 | transform = T.Compose( 21 | [ 22 | T.Resize(min_size, max_size), 23 | T.RandomHorizontalFlip(flip_prob), 24 | T.ToTensor(), 25 | normalize_transform, 26 | ] 27 | ) 28 | return transform 29 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import ConvTranspose2d 7 | from .misc import interpolate 8 | from .nms import nms 9 | from .roi_align import ROIAlign 10 | from .roi_align import roi_align 11 | from .roi_pool import ROIPool 12 | from .roi_pool import roi_pool 13 | from .smooth_l1_loss import smooth_l1_loss 14 | from .sigmoid_focal_loss import SigmoidFocalLoss 15 | from .gradient_scalar_layer import GradientScalarLayer 16 | from .consistency_loss import consistency_loss 17 | 18 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 19 | "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", 20 | "FrozenBatchNorm2d", "SigmoidFocalLoss", "GradientScalarLayer", 21 | "consistency_loss" 22 | ] 23 | 24 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/consistency_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def consistency_loss(img_feas, ins_fea, ins_labels, size_average=True): 4 | """ 5 | Consistency regularization as stated in the paper 6 | `Domain Adaptive Faster R-CNN for Object Detection in the Wild` 7 | L_cst = \sum_{i,j}||\frac{1}{|I|}\sum_{u,v}p_i^{(u,v)}-p_{i,j}||_2 8 | """ 9 | loss = [] 10 | len_ins = ins_fea.size(0) 11 | intervals = [torch.nonzero(ins_labels).size(0), len_ins-torch.nonzero(ins_labels).size(0)] 12 | for img_fea_per_level in img_feas: 13 | N, A, H, W = img_fea_per_level.shape 14 | img_fea_per_level = torch.mean(img_fea_per_level.reshape(N, -1), 1) 15 | img_feas_per_level = [] 16 | assert N==2, \ 17 | "only batch size=2 is supported for consistency loss now, received batch size: {}".format(N) 18 | for i in range(N): 19 | img_fea_mean = img_fea_per_level[i].view(1, 1).repeat(intervals[i], 1) 20 | img_feas_per_level.append(img_fea_mean) 21 | img_feas_per_level = torch.cat(img_feas_per_level, dim=0) 22 | loss_per_level = torch.abs(img_feas_per_level - ins_fea) 23 | loss.append(loss_per_level) 24 | loss = torch.cat(loss, dim=1) 25 | if size_average: 26 | return loss.mean() 27 | return loss.sum() 28 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # 4 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/gradient_scalar_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class _GradientScalarLayer(torch.autograd.Function): 5 | @staticmethod 6 | def forward(ctx, input, weight): 7 | ctx.weight = weight 8 | return input.view_as(input) 9 | 10 | @staticmethod 11 | def backward(ctx, grad_output): 12 | grad_input = grad_output.clone() 13 | return ctx.weight*grad_input, None 14 | 15 | gradient_scalar = _GradientScalarLayer.apply 16 | 17 | 18 | class GradientScalarLayer(torch.nn.Module): 19 | def __init__(self, weight): 20 | super(GradientScalarLayer, self).__init__() 21 | self.weight = weight 22 | 23 | def forward(self, input): 24 | return gradient_scalar(input, self.weight) 25 | 26 | def __repr__(self): 27 | tmpstr = self.__class__.__name__ + "(" 28 | tmpstr += "weight=" + str(self.weight) 29 | tmpstr += ")" 30 | return tmpstr -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from maskrcnn_benchmark import _C 7 | 8 | # TODO: Use JIT to replace CUDA implementation in the future. 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, gamma, alpha): 12 | ctx.save_for_backward(logits, targets) 13 | num_classes = logits.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | losses = _C.sigmoid_focalloss_forward( 19 | logits, targets, num_classes, gamma, alpha 20 | ) 21 | return losses 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, d_loss): 26 | logits, targets = ctx.saved_tensors 27 | num_classes = ctx.num_classes 28 | gamma = ctx.gamma 29 | alpha = ctx.alpha 30 | d_loss = d_loss.contiguous() 31 | d_logits = _C.sigmoid_focalloss_backward( 32 | logits, targets, d_loss, num_classes, gamma, alpha 33 | ) 34 | return d_logits, None, None, None, None 35 | 36 | 37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 38 | 39 | 40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 41 | num_classes = logits.shape[1] 42 | gamma = gamma[0] 43 | alpha = alpha[0] 44 | dtype = targets.dtype 45 | device = targets.device 46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 47 | 48 | t = targets.unsqueeze(1) 49 | p = torch.sigmoid(logits) 50 | term1 = (1 - p) ** gamma * torch.log(p) 51 | term2 = p ** gamma * torch.log(1 - p) 52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | return loss.sum() 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "gamma=" + str(self.gamma) 74 | tmpstr += ", alpha=" + str(self.alpha) 75 | tmpstr += ")" 76 | return tmpstr 77 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | from .backbone import build_backbone 4 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | from ..da_heads.da_heads import build_da_heads 15 | 16 | 17 | class GeneralizedRCNN(nn.Module): 18 | """ 19 | Main class for Generalized R-CNN. Currently supports boxes and masks. 20 | It consists of three main parts: 21 | - backbone 22 | - rpn 23 | - heads: takes the features + the proposals from the RPN and computes 24 | detections / masks from it. 25 | """ 26 | 27 | def __init__(self, cfg): 28 | super(GeneralizedRCNN, self).__init__() 29 | 30 | self.backbone = build_backbone(cfg) 31 | self.rpn = build_rpn(cfg) 32 | self.roi_heads = build_roi_heads(cfg) 33 | self.da_heads = build_da_heads(cfg) 34 | 35 | def forward(self, images, targets=None): 36 | """ 37 | Arguments: 38 | images (list[Tensor] or ImageList): images to be processed 39 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 40 | 41 | Returns: 42 | result (list[BoxList] or dict[Tensor]): the output from the model. 43 | During training, it returns a dict[Tensor] which contains the losses. 44 | During testing, it returns list[BoxList] contains additional fields 45 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 46 | 47 | """ 48 | if self.training and targets is None: 49 | raise ValueError("In training mode, targets should be passed") 50 | images = to_image_list(images) 51 | features = self.backbone(images.tensors) 52 | proposals, proposal_losses = self.rpn(images, features, targets) 53 | da_losses = {} 54 | if self.roi_heads: 55 | x, result, detector_losses, da_ins_feas, da_ins_labels = self.roi_heads(features, proposals, targets) 56 | if self.da_heads: 57 | da_losses = self.da_heads(features, da_ins_feas, da_ins_labels, targets) 58 | 59 | else: 60 | # RPN-only models don't have roi_heads 61 | x = features 62 | result = proposals 63 | detector_losses = {} 64 | 65 | if self.training: 66 | losses = {} 67 | losses.update(detector_losses) 68 | losses.update(proposal_losses) 69 | losses.update(da_losses) 70 | return losses 71 | 72 | return result 73 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 7 | ROI_BOX_PREDICTOR = Registry() 8 | RPN_HEADS = Registry() 9 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.modeling import registry 3 | from torch import nn 4 | 5 | 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, pretrained=None): 9 | super(FastRCNNPredictor, self).__init__() 10 | 11 | stage_index = 4 12 | stage2_relative_factor = 2 ** (stage_index - 1) 13 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 14 | num_inputs = res2_out_channels * stage2_relative_factor 15 | 16 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 17 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 18 | self.cls_score = nn.Linear(num_inputs, num_classes) 19 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 20 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 21 | 22 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 23 | nn.init.constant_(self.cls_score.bias, 0) 24 | 25 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 26 | nn.init.constant_(self.bbox_pred.bias, 0) 27 | 28 | def forward(self, x): 29 | x = self.avgpool(x) 30 | x = x.view(x.size(0), -1) 31 | cls_logit = self.cls_score(x) 32 | bbox_pred = self.bbox_pred(x) 33 | return cls_logit, bbox_pred 34 | 35 | 36 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 37 | class FPNPredictor(nn.Module): 38 | def __init__(self, cfg): 39 | super(FPNPredictor, self).__init__() 40 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 41 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 42 | 43 | self.cls_score = nn.Linear(representation_size, num_classes) 44 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 45 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 46 | 47 | nn.init.normal_(self.cls_score.weight, std=0.01) 48 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 49 | for l in [self.cls_score, self.bbox_pred]: 50 | nn.init.constant_(l.bias, 0) 51 | 52 | def forward(self, x): 53 | scores = self.cls_score(x) 54 | bbox_deltas = self.bbox_pred(x) 55 | 56 | return scores, bbox_deltas 57 | 58 | 59 | def make_roi_box_predictor(cfg): 60 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 61 | return func(cfg) 62 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /detection/maskrcnn_benchmark/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /detection/requirements.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | yacs 3 | cython 4 | matplotlib 5 | tqdm 6 | -------------------------------------------------------------------------------- /detection/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /detection/tests/env_tests/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import os 4 | 5 | 6 | def get_config_root_path(): 7 | ''' Path to configs for unit tests ''' 8 | # cur_file_dir is root/tests/env_tests 9 | cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) 10 | ret = os.path.dirname(os.path.dirname(cur_file_dir)) 11 | ret = os.path.join(ret, "configs") 12 | return ret 13 | -------------------------------------------------------------------------------- /detection/tests/test_backbones.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register backbones 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA 8 | from maskrcnn_benchmark.modeling import registry 9 | from maskrcnn_benchmark.config import cfg as g_cfg 10 | from utils import load_config 11 | 12 | 13 | # overwrite configs if specified, otherwise default config is used 14 | BACKBONE_CFGS = { 15 | "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml", 16 | "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 17 | "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 18 | "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml", 19 | "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml", 20 | } 21 | 22 | 23 | class TestBackbones(unittest.TestCase): 24 | def test_build_backbones(self): 25 | ''' Make sure backbones run ''' 26 | 27 | self.assertGreater(len(registry.BACKBONES), 0) 28 | 29 | for name, backbone_builder in registry.BACKBONES.items(): 30 | print('Testing {}...'.format(name)) 31 | if name in BACKBONE_CFGS: 32 | cfg = load_config(BACKBONE_CFGS[name]) 33 | else: 34 | # Use default config if config file is not specified 35 | cfg = copy.deepcopy(g_cfg) 36 | backbone = backbone_builder(cfg) 37 | 38 | # make sures the backbone has `out_channels` 39 | self.assertIsNotNone( 40 | getattr(backbone, 'out_channels', None), 41 | 'Need to provide out_channels for backbone {}'.format(name) 42 | ) 43 | 44 | N, C_in, H, W = 2, 3, 224, 256 45 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 46 | out = backbone(input) 47 | for cur_out in out: 48 | self.assertEqual( 49 | cur_out.shape[:2], 50 | torch.Size([N, backbone.out_channels]) 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /detection/tests/test_configs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import glob 5 | import os 6 | import utils 7 | 8 | 9 | class TestConfigs(unittest.TestCase): 10 | def test_configs_load(self): 11 | ''' Make sure configs are loadable ''' 12 | 13 | cfg_root_path = utils.get_config_root_path() 14 | files = glob.glob( 15 | os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True) 16 | self.assertGreater(len(files), 0) 17 | 18 | for fn in files: 19 | print('Loading {}...'.format(fn)) 20 | utils.load_config_from_file(fn) 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /detection/tests/test_metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 5 | 6 | 7 | class TestMetricLogger(unittest.TestCase): 8 | def test_update(self): 9 | meter = MetricLogger() 10 | for i in range(10): 11 | meter.update(metric=float(i)) 12 | 13 | m = meter.meters["metric"] 14 | self.assertEqual(m.count, 10) 15 | self.assertEqual(m.total, 45) 16 | self.assertEqual(m.median, 4) 17 | self.assertEqual(m.avg, 4.5) 18 | 19 | def test_no_attr(self): 20 | meter = MetricLogger() 21 | _ = meter.meters 22 | _ = meter.delimiter 23 | def broken(): 24 | _ = meter.not_existent 25 | self.assertRaises(AttributeError, broken) 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /detection/tests/test_rpn_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register rpn heads 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA 8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA 9 | from maskrcnn_benchmark.modeling import registry 10 | from maskrcnn_benchmark.config import cfg as g_cfg 11 | from utils import load_config 12 | 13 | 14 | # overwrite configs if specified, otherwise default config is used 15 | RPN_CFGS = { 16 | } 17 | 18 | 19 | class TestRPNHeads(unittest.TestCase): 20 | def test_build_rpn_heads(self): 21 | ''' Make sure rpn heads run ''' 22 | 23 | self.assertGreater(len(registry.RPN_HEADS), 0) 24 | 25 | in_channels = 64 26 | num_anchors = 10 27 | 28 | for name, builder in registry.RPN_HEADS.items(): 29 | print('Testing {}...'.format(name)) 30 | if name in RPN_CFGS: 31 | cfg = load_config(RPN_CFGS[name]) 32 | else: 33 | # Use default config if config file is not specified 34 | cfg = copy.deepcopy(g_cfg) 35 | 36 | rpn = builder(cfg, in_channels, num_anchors) 37 | 38 | N, C_in, H, W = 2, in_channels, 24, 32 39 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 40 | LAYERS = 3 41 | out = rpn([input] * LAYERS) 42 | self.assertEqual(len(out), 2) 43 | logits, bbox_reg = out 44 | for idx in range(LAYERS): 45 | self.assertEqual( 46 | logits[idx].shape, 47 | torch.Size([ 48 | input.shape[0], num_anchors, 49 | input.shape[2], input.shape[3], 50 | ]) 51 | ) 52 | self.assertEqual( 53 | bbox_reg[idx].shape, 54 | torch.Size([ 55 | logits[idx].shape[0], num_anchors * 4, 56 | logits[idx].shape[2], logits[idx].shape[3], 57 | ]), 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /detection/tests/test_segmentation_mask.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | import torch 4 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 5 | 6 | 7 | class TestSegmentationMask(unittest.TestCase): 8 | def __init__(self, method_name='runTest'): 9 | super(TestSegmentationMask, self).__init__(method_name) 10 | poly = [[[423.0, 306.5, 406.5, 277.0, 400.0, 271.5, 389.5, 277.0, 11 | 387.5, 292.0, 384.5, 295.0, 374.5, 220.0, 378.5, 210.0, 12 | 391.0, 200.5, 404.0, 199.5, 414.0, 203.5, 425.5, 221.0, 13 | 438.5, 297.0, 423.0, 306.5], 14 | [100, 100, 200, 100, 200, 200, 100, 200], 15 | ]] 16 | width = 640 17 | height = 480 18 | size = width, height 19 | 20 | self.P = SegmentationMask(poly, size, 'poly') 21 | self.M = SegmentationMask(poly, size, 'poly').convert('mask') 22 | 23 | def L1(self, A, B): 24 | diff = A.get_mask_tensor() - B.get_mask_tensor() 25 | diff = torch.sum(torch.abs(diff.float())).item() 26 | return diff 27 | 28 | def test_convert(self): 29 | M_hat = self.M.convert('poly').convert('mask') 30 | P_hat = self.P.convert('mask').convert('poly') 31 | 32 | diff_mask = self.L1(self.M, M_hat) 33 | diff_poly = self.L1(self.P, P_hat) 34 | self.assertTrue(diff_mask == diff_poly) 35 | self.assertTrue(diff_mask <= 8169.) 36 | self.assertTrue(diff_poly <= 8169.) 37 | 38 | def test_crop(self): 39 | box = [400, 250, 500, 300] # xyxy 40 | diff = self.L1(self.M.crop(box), self.P.crop(box)) 41 | self.assertTrue(diff <= 1.) 42 | 43 | def test_resize(self): 44 | new_size = 50, 25 45 | M_hat = self.M.resize(new_size) 46 | P_hat = self.P.resize(new_size) 47 | diff = self.L1(M_hat, P_hat) 48 | 49 | self.assertTrue(self.M.size == self.P.size) 50 | self.assertTrue(M_hat.size == P_hat.size) 51 | self.assertTrue(self.M.size != M_hat.size) 52 | self.assertTrue(diff <= 255.) 53 | 54 | def test_transpose(self): 55 | FLIP_LEFT_RIGHT = 0 56 | FLIP_TOP_BOTTOM = 1 57 | diff_hor = self.L1(self.M.transpose(FLIP_LEFT_RIGHT), 58 | self.P.transpose(FLIP_LEFT_RIGHT)) 59 | 60 | diff_ver = self.L1(self.M.transpose(FLIP_TOP_BOTTOM), 61 | self.P.transpose(FLIP_TOP_BOTTOM)) 62 | 63 | self.assertTrue(diff_hor <= 53250.) 64 | self.assertTrue(diff_ver <= 42494.) 65 | 66 | 67 | if __name__ == "__main__": 68 | 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /detection/tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # Set up custom environment before nearly anything else is imported 4 | # NOTE: this should be the first import (no not reorder) 5 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 6 | import env_tests.env as env_tests 7 | 8 | import os 9 | import copy 10 | 11 | from maskrcnn_benchmark.config import cfg as g_cfg 12 | 13 | 14 | def get_config_root_path(): 15 | return env_tests.get_config_root_path() 16 | 17 | 18 | def load_config(rel_path): 19 | ''' Load config from file path specified as path relative to config_root ''' 20 | cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path) 21 | return load_config_from_file(cfg_path) 22 | 23 | 24 | def load_config_from_file(file_path): 25 | ''' Load config from file path specified as absolute path ''' 26 | ret = copy.deepcopy(g_cfg) 27 | ret.merge_from_file(file_path) 28 | return ret 29 | -------------------------------------------------------------------------------- /detection/tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Convert instances from png files to a dictionary 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111 5 | 6 | from __future__ import print_function, absolute_import, division 7 | import os, sys 8 | 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 10 | from csHelpers import * 11 | 12 | # Cityscapes imports 13 | from cityscapesscripts.evaluation.instance import * 14 | from cityscapesscripts.helpers.csHelpers import * 15 | import cv2 16 | from maskrcnn_benchmark.utils import cv2_util 17 | 18 | 19 | def instances2dict_with_polygons(imageFileList, verbose=False): 20 | imgCount = 0 21 | instanceDict = {} 22 | 23 | if not isinstance(imageFileList, list): 24 | imageFileList = [imageFileList] 25 | 26 | if verbose: 27 | print("Processing {} images...".format(len(imageFileList))) 28 | 29 | for imageFileName in imageFileList: 30 | # Load image 31 | img = Image.open(imageFileName) 32 | 33 | # Image as numpy array 34 | imgNp = np.array(img) 35 | 36 | # Initialize label categories 37 | instances = {} 38 | for label in labels: 39 | instances[label.name] = [] 40 | 41 | # Loop through all instance ids in instance image 42 | for instanceId in np.unique(imgNp): 43 | if instanceId < 1000: 44 | continue 45 | instanceObj = Instance(imgNp, instanceId) 46 | instanceObj_dict = instanceObj.toDict() 47 | 48 | #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict()) 49 | if id2label[instanceObj.labelID].hasInstances: 50 | mask = (imgNp == instanceId).astype(np.uint8) 51 | contour, hier = cv2_util.findContours( 52 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 53 | 54 | polygons = [c.reshape(-1).tolist() for c in contour] 55 | instanceObj_dict['contours'] = polygons 56 | 57 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 58 | 59 | imgKey = os.path.abspath(imageFileName) 60 | instanceDict[imgKey] = instances 61 | imgCount += 1 62 | 63 | if verbose: 64 | print("\rImages Processed: {}".format(imgCount), end=' ') 65 | sys.stdout.flush() 66 | 67 | if verbose: 68 | print("") 69 | 70 | return instanceDict 71 | 72 | def main(argv): 73 | fileList = [] 74 | if (len(argv) > 2): 75 | for arg in argv: 76 | if ("png" in arg): 77 | fileList.append(arg) 78 | instances2dict_with_polygons(fileList, True) 79 | 80 | if __name__ == "__main__": 81 | main(sys.argv[1:]) 82 | -------------------------------------------------------------------------------- /imgs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SNR/f3d51b5e3525fe5e1ea364fafdf0e4cc60b1362b/imgs/pipeline.png -------------------------------------------------------------------------------- /segmentation/datasets/NTHU_list/Rio/List/test.txt: -------------------------------------------------------------------------------- 1 | pano_00002_2_180 2 | pano_00003_1_180 3 | pano_00004_1_180 4 | pano_00009_3_180 5 | pano_00010_3_0 6 | pano_00016_3_0 7 | pano_00019_0_180 8 | pano_00020_0_0 9 | pano_00021_5_180 10 | pano_00025_0_0 11 | pano_00031_1_180 12 | pano_00033_1_0 13 | pano_00045_6_180 14 | pano_00048_1_0 15 | pano_00048_3_180 16 | pano_00053_4_0 17 | pano_00056_3_180 18 | pano_00059_0_0 19 | pano_00061_4_0 20 | pano_00064_0_0 21 | pano_00068_0_0 22 | pano_00074_0_180 23 | pano_00083_0_180 24 | pano_00092_6_0 25 | pano_00094_2_180 26 | pano_00095_5_180 27 | pano_00096_0_180 28 | pano_00105_1_180 29 | pano_00109_2_0 30 | pano_00112_3_0 31 | pano_00116_0_0 32 | pano_00126_1_180 33 | pano_00128_0_0 34 | pano_00129_4_180 35 | pano_00139_1_180 36 | pano_00155_1_180 37 | pano_00160_0_180 38 | pano_00164_1_0 39 | pano_00166_1_0 40 | pano_00172_1_0 41 | pano_00176_1_180 42 | pano_00179_0_180 43 | pano_00183_0_180 44 | pano_00185_0_0 45 | pano_00188_1_180 46 | pano_00189_2_0 47 | pano_00197_3_0 48 | pano_00199_2_0 49 | pano_00202_1_0 50 | pano_00202_2_180 51 | pano_00204_0_180 52 | pano_00205_3_0 53 | pano_00205_3_180 54 | pano_00207_1_0 55 | pano_00207_1_180 56 | pano_00207_7_180 57 | pano_00217_2_0 58 | pano_00220_0_180 59 | pano_00221_3_0 60 | pano_00222_2_180 61 | pano_00226_0_0 62 | pano_00226_1_180 63 | pano_00232_2_180 64 | pano_00232_5_0 65 | pano_00235_5_180 66 | pano_00235_7_0 67 | pano_00236_3_180 68 | pano_00238_2_0 69 | pano_00238_5_180 70 | pano_00239_2_0 71 | pano_00358_5_180 72 | pano_00370_0_180 73 | pano_00373_0_180 74 | pano_00376_3_0 75 | pano_00379_1_180 76 | pano_00386_0_0 77 | pano_00396_1_180 78 | pano_00399_0_180 79 | pano_00401_1_180 80 | pano_00403_3_180 81 | pano_00427_0_0 82 | pano_00430_3_0 83 | pano_00443_0_180 84 | pano_00459_0_180 85 | pano_00460_6_0 86 | pano_00531_1_0 87 | pano_00541_0_180 88 | pano_00554_0_180 89 | pano_00558_1_0 90 | pano_00584_2_0 91 | pano_00587_1_0 92 | pano_00619_1_180 93 | pano_00631_0_0 94 | pano_03575_2_0 95 | pano_03584_0_0 96 | pano_03588_0_0 97 | pano_03602_0_0 98 | pano_03603_2_180 99 | pano_03608_1_0 100 | pano_05140_2_0 101 | -------------------------------------------------------------------------------- /segmentation/datasets/NTHU_list/Rome/List/test.txt: -------------------------------------------------------------------------------- 1 | pano_00368_0_0 2 | pano_00368_0_180 3 | pano_00390_1_0 4 | pano_00412_0_0 5 | pano_00412_4_180 6 | pano_00431_1_0 7 | pano_00443_1_180 8 | pano_00474_0_0 9 | pano_00474_1_0 10 | pano_00494_0_0 11 | pano_00494_0_180 12 | pano_00498_0_0 13 | pano_00498_1_180 14 | pano_00500_0_180 15 | pano_00500_2_0 16 | pano_00507_2_180 17 | pano_00513_0_0 18 | pano_00513_1_180 19 | pano_00522_0_180 20 | pano_00537_0_180 21 | pano_00537_2_180 22 | pano_00544_1_180 23 | pano_00544_2_0 24 | pano_00545_0_0 25 | pano_00545_0_180 26 | pano_00561_0_0 27 | pano_00561_2_180 28 | pano_00562_0_0 29 | pano_00562_0_180 30 | pano_00563_0_0 31 | pano_00563_0_180 32 | pano_00564_0_0 33 | pano_00565_1_0 34 | pano_00565_3_180 35 | pano_00566_0_180 36 | pano_00566_2_0 37 | pano_00567_0_0 38 | pano_00567_2_180 39 | pano_00568_0_0 40 | pano_00568_2_180 41 | pano_00571_0_0 42 | pano_00571_2_180 43 | pano_00586_2_0 44 | pano_00586_2_180 45 | pano_00595_1_0 46 | pano_00606_1_180 47 | pano_00606_2_0 48 | pano_00608_1_0 49 | pano_00610_0_0 50 | pano_00610_2_180 51 | pano_00622_1_0 52 | pano_00651_0_0 53 | pano_00922_1_0 54 | pano_00922_1_180 55 | pano_00933_0_0 56 | pano_00937_0_0 57 | pano_00937_0_180 58 | pano_00959_0_0 59 | pano_00967_0_180 60 | pano_00967_4_0 61 | pano_00986_1_180 62 | pano_00996_0_180 63 | pano_00998_0_0 64 | pano_01001_0_0 65 | pano_01005_0_0 66 | pano_01006_4_180 67 | pano_01025_1_0 68 | pano_01027_2_180 69 | pano_01027_5_0 70 | pano_01034_0_0 71 | pano_01045_1_0 72 | pano_01052_5_0 73 | pano_01053_2_0 74 | pano_01057_0_180 75 | pano_01067_0_0 76 | pano_01113_1_0 77 | pano_01121_1_0 78 | pano_01125_2_180 79 | pano_01140_3_180 80 | pano_01195_0_180 81 | pano_01203_1_180 82 | pano_01205_0_0 83 | pano_01211_0_180 84 | pano_01223_1_0 85 | pano_01228_0_0 86 | pano_01228_3_180 87 | pano_01236_2_180 88 | pano_01260_1_0 89 | pano_01283_1_180 90 | pano_01295_0_0 91 | pano_01295_1_180 92 | pano_01311_2_0 93 | pano_01337_2_0 94 | pano_01361_1_0 95 | pano_01412_0_180 96 | pano_01444_1_0 97 | pano_01444_4_180 98 | pano_01470_1_180 99 | pano_01486_2_0 100 | pano_01507_1_0 101 | -------------------------------------------------------------------------------- /segmentation/datasets/NTHU_list/Taipei/List/test.txt: -------------------------------------------------------------------------------- 1 | pano_00000_0_0 2 | pano_00001_0_0 3 | pano_00002_0_180 4 | pano_00010_0_0 5 | pano_00011_0_0 6 | pano_00012_0_0 7 | pano_00012_0_180 8 | pano_00024_0_0 9 | pano_00028_0_180 10 | pano_00035_0_0 11 | pano_00035_0_180 12 | pano_00037_0_180 13 | pano_00040_0_0 14 | pano_00043_0_0 15 | pano_00074_0_0 16 | pano_00108_0_0 17 | pano_00109_0_180 18 | pano_00110_0_0 19 | pano_00113_0_0 20 | pano_00114_0_0 21 | pano_00116_0_0 22 | pano_00142_0_0 23 | pano_00143_0_180 24 | pano_00173_0_0 25 | pano_00173_0_180 26 | pano_00174_0_180 27 | pano_00176_0_180 28 | pano_00183_0_180 29 | pano_00184_0_180 30 | pano_00203_0_180 31 | pano_00205_2_0 32 | pano_00212_0_180 33 | pano_00213_0_180 34 | pano_00215_2_0 35 | pano_00218_0_0 36 | pano_00221_0_180 37 | pano_00222_0_180 38 | pano_00223_0_0 39 | pano_00223_0_180 40 | pano_00224_0_180 41 | pano_00225_0_180 42 | pano_00229_0_0 43 | pano_00230_0_0 44 | pano_00230_0_180 45 | pano_00232_0_0 46 | pano_00233_0_0 47 | pano_00237_0_0 48 | pano_00238_0_0 49 | pano_00240_0_0 50 | pano_00245_0_0 51 | pano_00246_0_0 52 | pano_00247_0_180 53 | pano_00260_0_0 54 | pano_00260_0_180 55 | pano_00261_0_180 56 | pano_00267_0_0 57 | pano_00270_0_0 58 | pano_00271_0_0 59 | pano_00271_0_180 60 | pano_00272_0_0 61 | pano_00308_0_0 62 | pano_00310_0_0 63 | pano_00315_0_0 64 | pano_00315_0_180 65 | pano_00316_0_0 66 | pano_00317_0_0 67 | pano_00318_0_0 68 | pano_00323_0_0 69 | pano_00325_0_0 70 | pano_00325_0_180 71 | pano_00327_0_0 72 | pano_00327_0_180 73 | pano_00334_0_0 74 | pano_00334_0_180 75 | pano_00335_0_0 76 | pano_00337_0_180 77 | pano_00439_0_180 78 | pano_00450_0_0 79 | pano_00463_0_0 80 | pano_00466_0_180 81 | pano_00481_0_0 82 | pano_00487_0_0 83 | pano_00493_0_0 84 | pano_00493_0_180 85 | pano_00496_0_180 86 | pano_00498_0_0 87 | pano_00499_0_0 88 | pano_00500_0_0 89 | pano_00504_0_0 90 | pano_00506_0_0 91 | pano_00509_0_0 92 | pano_00509_0_180 93 | pano_00516_0_180 94 | pano_00525_0_180 95 | pano_01135_0_0 96 | pano_01718_0_0 97 | pano_01726_0_0 98 | pano_01726_0_180 99 | pano_01727_0_0 100 | pano_01728_0_180 101 | -------------------------------------------------------------------------------- /segmentation/datasets/NTHU_list/Tokyo/List/test.txt: -------------------------------------------------------------------------------- 1 | pano_00002_2_0 2 | pano_00022_2_0 3 | pano_00027_0_0 4 | pano_00037_0_0 5 | pano_00042_1_180 6 | pano_00062_4_0 7 | pano_00068_1_180 8 | pano_00076_3_0 9 | pano_00087_0_0 10 | pano_00093_3_180 11 | pano_00111_0_180 12 | pano_00111_4_180 13 | pano_00117_3_180 14 | pano_00176_2_0 15 | pano_00188_1_0 16 | pano_00204_1_0 17 | pano_00236_2_0 18 | pano_00270_2_0 19 | pano_00272_0_180 20 | pano_00320_0_180 21 | pano_00328_2_180 22 | pano_00335_3_180 23 | pano_00340_2_0 24 | pano_00374_3_0 25 | pano_00391_4_0 26 | pano_00405_6_0 27 | pano_00566_3_0 28 | pano_00574_0_180 29 | pano_00585_2_180 30 | pano_00588_0_180 31 | pano_00590_1_0 32 | pano_00595_0_180 33 | pano_00601_1_180 34 | pano_00607_3_180 35 | pano_00623_1_0 36 | pano_00635_3_180 37 | pano_00645_3_0 38 | pano_00647_1_180 39 | pano_00650_3_180 40 | pano_00654_3_180 41 | pano_00665_3_0 42 | pano_00682_2_180 43 | pano_00683_0_180 44 | pano_00695_1_180 45 | pano_00709_1_0 46 | pano_00720_4_0 47 | pano_00723_2_180 48 | pano_00749_0_180 49 | pano_00750_6_180 50 | pano_00760_1_180 51 | pano_00760_4_0 52 | pano_00780_2_180 53 | pano_00792_1_180 54 | pano_00810_2_180 55 | pano_00821_1_0 56 | pano_00826_0_180 57 | pano_00870_3_0 58 | pano_00883_1_0 59 | pano_00894_1_180 60 | pano_00914_1_0 61 | pano_00915_2_180 62 | pano_00926_1_180 63 | pano_00940_2_0 64 | pano_00997_1_0 65 | pano_01022_3_0 66 | pano_01033_1_0 67 | pano_01048_2_0 68 | pano_01081_0_0 69 | pano_01098_3_180 70 | pano_01133_3_0 71 | pano_01143_1_0 72 | pano_01206_0_0 73 | pano_01208_3_180 74 | pano_01219_2_0 75 | pano_01222_2_0 76 | pano_01347_1_180 77 | pano_01349_1_0 78 | pano_01352_0_0 79 | pano_01360_0_180 80 | pano_01364_2_0 81 | pano_01375_0_180 82 | pano_01378_1_0 83 | pano_01379_0_180 84 | pano_01390_0_0 85 | pano_01404_3_0 86 | pano_01405_2_0 87 | pano_01410_0_0 88 | pano_01413_1_0 89 | pano_01418_4_180 90 | pano_01428_1_0 91 | pano_01433_1_180 92 | pano_01437_2_180 93 | pano_01446_2_0 94 | pano_01516_1_0 95 | pano_01520_1_0 96 | pano_01521_0_0 97 | pano_01528_0_180 98 | pano_01535_1_0 99 | pano_01536_3_0 100 | pano_01548_0_180 101 | -------------------------------------------------------------------------------- /segmentation/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 7 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 9 | for cls in classes: 10 | setattr(sys.modules[__name__], cls.__name__, cls) 11 | -------------------------------------------------------------------------------- /segmentation/graphs/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 7 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 9 | for cls in classes: 10 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /segmentation/graphs/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | import os 4 | import sys 5 | 6 | path = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 9 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 10 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 11 | for cls in classes: 12 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /segmentation/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.6 2 | pillow==5.3.0 3 | tqdm==4.26.0 4 | tensorboardX==1.4 5 | torchsummary==1.5.1 6 | imageio==2.1.2 -------------------------------------------------------------------------------- /segmentation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | import os 4 | import sys 5 | 6 | path = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 9 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 10 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 11 | for cls in classes: 12 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /segmentation/utils/train_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | from graphs.models.deeplab_multi import DeeplabMulti, DeeplabMulti_SNR 9 | 10 | def get_model(args): 11 | if args.backbone == "deeplabv2_multi": 12 | model = DeeplabMulti(num_classes=args.num_classes, 13 | pretrained=args.imagenet_pretrained) 14 | params = model.optim_parameters(args) 15 | args.numpy_transform = True 16 | elif args.backbone == "deeplabv2_multi_snr": 17 | model = DeeplabMulti_SNR(num_classes=args.num_classes, 18 | pretrained=args.imagenet_pretrained) 19 | params = model.optim_parameters(args) 20 | args.numpy_transform = True 21 | return model, params --------------------------------------------------------------------------------