├── .flake8 ├── .gitignore ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── configs ├── caffe2 │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── cityscapes │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x_ICDAR13.yaml ├── e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_rrpn_R_50_C4_1x_ICDAR13_15_trial.yaml ├── e2e_rrpn_R_50_C4_1x_ICDAR13_test.yaml ├── e2e_rrpn_R_50_C4_1x_ICDAR13_toy.yaml ├── gn_baselines │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml ├── pascal_voc │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── quick_schedules │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── rpn_R_50_C4_quick.yaml │ └── rpn_R_50_FPN_quick.yaml └── rrpn │ ├── e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml │ └── e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml ├── demo ├── ICDAR_demo.py ├── README.md ├── RRPN_Demo.py ├── __init__.py ├── link_boxes.py ├── polygon_wrapper.py ├── predictor.py ├── results │ ├── rrpn1.png │ ├── rrpn2.png │ ├── rrpn3.png │ ├── rrpn4.png │ └── rrpn5.png ├── rrpn_e2e_infer.py └── webcam.py ├── docker ├── Dockerfile └── docker-jupyter │ ├── Dockerfile │ └── jupyter_notebook_config.py ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── RROIAlign.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── RROIAlign_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── icdar_series.py │ │ ├── list_dataset.py │ │ ├── rotation_mask_datasets.py │ │ ├── rotation_series.py │ │ ├── rrpn_e2e_series.py │ │ └── voc.py │ ├── icdar_series.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── rroi_align.py │ ├── rroi_pool.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── arpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── rpn.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ ├── generalized_rcnn.py │ │ └── generalized_rrpn_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── rbox_coder.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ ├── rbox_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── rec_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── rec_head.py │ │ │ ├── roi_rec_feature_extractors.py │ │ │ └── roi_rec_predictors.py │ │ ├── rmask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ ├── roi_heads.py │ │ └── rroi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── rpn.py │ ├── rrpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── rrpn.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── rboxlist_ops.py │ ├── segmentation_for_rbox.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── rec_utils.py │ ├── registry.py │ └── visualize.py ├── rotation ├── __init__.py ├── rbbox_overlaps.cpp ├── rbbox_overlaps.hpp ├── rbbox_overlaps.pyx ├── rbbox_overlaps_kernel.cu ├── rotate_circle_nms.c ├── rotate_circle_nms.pyx ├── rotate_cpu_nms.py ├── rotate_cpython_nms.pyx ├── rotate_cython_nms.c ├── rotate_cython_nms.pyx ├── rotate_gpu_nms.cpp ├── rotate_gpu_nms.hpp ├── rotate_gpu_nms.pyx ├── rotate_nms_kernel.cu ├── rotate_polygon_nms.cpp ├── rotate_polygon_nms.hpp ├── rotate_polygon_nms.pyx └── rotate_polygon_nms_kernel.cu ├── rotation_setup.py ├── setup.py ├── tests ├── checkpoint.py ├── test_data_samplers.py └── test_metric_logger.py └── tools ├── cityscapes ├── convert_cityscapes_to_coco.py └── instances2dict_with_polygons.py ├── test_net.py └── train_net.py /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = E203, E266, E501, W503 6 | max-line-length = 80 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | maskrcnn_benchmark.egg-info/ 7 | build/ 8 | dist/ 9 | 10 | # pytorch/python/numpy formats 11 | *.pth 12 | *.pkl 13 | *.npy 14 | 15 | # ipython/jupyter notebooks 16 | *.ipynb 17 | **/.ipynb_checkpoints/ 18 | 19 | # Editor temporaries 20 | *.swn 21 | *.swo 22 | *.swp 23 | *~ 24 | 25 | # Pycharm editor settings 26 | .idea 27 | 28 | # project dirs 29 | /datasets 30 | /models 31 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/ 5 | - torchvision from master 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - (optional) OpenCV for the webcam demo 11 | 12 | 13 | 14 | ```bash 15 | # first, make sure that your conda is setup properly with the right environment 16 | # for that, check that `which conda`, `which pip` and `which python` points to the 17 | # right path. From a clean conda env, this is what you need to do 18 | 19 | conda create --name rrpn_pytorch 20 | source activate rrpn_pytorch 21 | 22 | # this installs the right pip and dependencies for the fresh python 23 | conda install ipython 24 | 25 | # maskrcnn_benchmark and coco api dependencies 26 | pip install ninja yacs cython matplotlib 27 | 28 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 29 | # we give the instructions for CUDA 9.0 30 | conda install pytorch 31 | 32 | # install torchvision 33 | cd ~/github 34 | git clone https://github.com/pytorch/vision.git 35 | cd vision 36 | python setup.py install 37 | 38 | # install pycocotools 39 | cd ~/github 40 | git clone https://github.com/cocodataset/cocoapi.git 41 | cd cocoapi/PythonAPI 42 | python setup.py build_ext install 43 | 44 | # install PyTorch Detection 45 | cd ~/github 46 | git clone https://github.com/mjq11302010044/RRPN_pytorch.git 47 | cd RRPN_pytorch 48 | # the following will install the lib with 49 | # symbolic links, so that you can modify 50 | # the files if you want and won't need to 51 | # re-build it 52 | python setup.py build develop 53 | 54 | #------- 55 | python rotation_setup.py install 56 | mv build/lib/rotation/*.so ./rotation 57 | #------- 58 | 59 | # or if you are on macOS 60 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 61 | ``` 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Facebook 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RRPN_pytorch 2 | RRPN in pytorch, which is implemented under framework of facebook's benchmark: https://github.com/facebookresearch/maskrcnn-benchmark. 3 | Its caffe version can be viewed at: https://github.com/mjq11302010044/RRPN. 4 | 5 | This repo has been deprecated, please see our new repo RRPN++ (https://github.com/mjq11302010044/RRPN_plusplus). 6 | 7 | ![alt text](demo/results/rrpn3.png "Results from IC15 testing set") 8 | 9 | ## Highlights 10 | - **From original repo:** In pytorch 1.0, Somehow faster than original repo in both training and inference. 11 | - **Training and evaluation checked:** Testing in IC15 with training data in {IC13, IC15, IC17mlt}, and receives Fscore of 83% vs. 81% in caffe repo. 12 | - **What's new:** RRoI Pooling is replaced with RRoI Alignment(bilinear interpolation for sampling), FPN structure supported, easy to change various backbones for different purposes. 13 | 14 | ## Installation 15 | 16 | Check [INSTALL.md](INSTALL.md) for installation instructions. 17 | 18 | ## Configuring your dataset 19 | - Your dataset path can be set in `$RRPN_ROOT/maskrcnn_benchmark/config/paths_catalog.py`. We implemented interface for {IC13, IC15, IC17mlt, LSVT, ArT} for common use(Start from line 96): 20 | ```bash 21 | ... 22 | "RRPN_train": { # including IC13 and IC15 23 | 'dataset_list':{ 24 | # 'IC13': 'Your dataset path', 25 | ... 26 | }, 27 | "split": 'train' 28 | }, 29 | ... 30 | ``` 31 | - Add your dataset? 32 | You need to form a dict array as follows: 33 | ```bash 34 | im_info = { 35 | 'gt_classes': your class_id array, 36 | 'max_classes': your class_id array, 37 | 'image': path to access one image, 38 | 'boxes': rotate box in {cx, cy, w, h, θ}, 39 | 'flipped': Not supported, just False, 40 | 'gt_overlaps': overlaps fill with 1 (gt with gt), 41 | 'seg_areas': H * W for an rbox, 42 | 'height': height of an image, 43 | 'width': width of an image, 44 | 'max_overlaps': overlaps fill with 1 (gt with gt), 45 | 'rotated': just True 46 | } 47 | ``` 48 | Examples can be seen in `$RRPN_ROOT/maskrcnn_benchmark/data/rotation_series.py` 49 | Your data API should be add to the variable `DATASET`: 50 | ```bash 51 | DATASET = { 52 | 'IC13':get_ICDAR2013, 53 | 'IC15':get_ICDAR2015_RRC_PICK_TRAIN, 54 | 'IC17mlt':get_ICDAR2017_mlt, 55 | ... 56 | 'Your Dataset Name': 'Your Dataset API' 57 | } 58 | ``` 59 | 60 | ## Training 61 | ```bash 62 | # create your data cache directory 63 | mkdir data_cache 64 | ``` 65 | ```bash 66 | # In your root of RRPN 67 | python tools/train_net.py --config-file=configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml 68 | ``` 69 | - Multi-GPU phase is not testing yet, be careful to use GPU more than 1. 70 | 71 | ## Testing 72 | - Using `$RRPN_ROOT/demo/RRPN_Demo.py` to test images you want. The demo will generate a text for your detected coodinates. 73 | - Showing the detected image by ture the variable `vis` to True. 74 | 75 | ## Final 76 | - Enjoy it with all the codes. 77 | - Citing us if you find it work in your projects. 78 | ``` 79 | @misc{ma2019rrpn, 80 | author = {Jianqi Ma}, 81 | title = {{RRPN in pytorch}}, 82 | year = {2019}, 83 | howpublished = {\url{https://github.com/mjq11302010044/RRPN_pytorch}}, 84 | } 85 | @article{Jianqi17RRPN, 86 | Author = {Jianqi Ma and Weiyuan Shao and Hao Ye and Li Wang and Hong Wang and Yingbin Zheng and Xiangyang Xue}, 87 | Title = {Arbitrary-Oriented Scene Text Detection via Rotation Proposals}, 88 | journal = {IEEE Transactions on Multimedia}, 89 | volume={20}, 90 | number={11}, 91 | pages={3111-3122}, 92 | year={2018} 93 | } 94 | ``` 95 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 9 23 | DATASETS: 24 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 25 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.01 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (18000,) 32 | MAX_ITER: 24000 33 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 9 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 34 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (18000,) 41 | MAX_ITER: 24000 42 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("ICDAR2013Dataset", ) 9 | TEST: ("ICDAR2013Dataset",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (20000, 60000) 14 | MAX_ITER: 80000 15 | IMS_PER_BATCH: 1 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "../model_0020000.pth" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("ICDAR2013Dataset", ) 9 | TEST: ("ICDAR2013Dataset",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (20000, 60000) 14 | MAX_ITER: 80000 15 | IMS_PER_BATCH: 4 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/e2e_rrpn_R_50_C4_1x_ICDAR13_15_trial.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RRPN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | RPN_HEAD: "SingleConvRRPNHead" 8 | 9 | ROI_BOX_HEAD: 10 | NUM_CLASSES: 2 11 | ROI_HEADS: 12 | BATCH_SIZE_PER_IMAGE: 256 13 | 14 | DATASETS: 15 | TRAIN: ("RRPN_train", ) 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (50000, 100000) 20 | MAX_ITER: 150000 21 | IMS_PER_BATCH: 1 22 | 23 | OUTPUT_DIR: './models/IC-13-15-Trial/' 24 | -------------------------------------------------------------------------------- /configs/e2e_rrpn_R_50_C4_1x_ICDAR13_test.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RRPN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | RPN_HEAD: "SingleConvRRPNHead" 8 | 9 | ROI_BOX_HEAD: 10 | NUM_CLASSES: 2 11 | ROI_HEADS: 12 | BATCH_SIZE_PER_IMAGE: 128 13 | 14 | DATASETS: 15 | TRAIN: ("RRPN_train", ) 16 | SOLVER: 17 | BASE_LR: 0.003 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (20000, 60000) 20 | MAX_ITER: 80000 21 | IMS_PER_BATCH: 1 22 | 23 | -------------------------------------------------------------------------------- /configs/e2e_rrpn_R_50_C4_1x_ICDAR13_toy.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RRPN" 3 | WEIGHT: "./model_0030000.pth" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | RPN_HEAD: "SingleConvRRPNHead" 8 | 9 | ROI_BOX_HEAD: 10 | NUM_CLASSES: 2 11 | 12 | DATALOADER: 13 | SIZE_DIVISIBILITY: 1 14 | 15 | DATASETS: 16 | TRAIN: ("RRPN_train", ) 17 | TEST: ("RRPN_train",) 18 | SOLVER: 19 | BASE_LR: 0.003 20 | WEIGHT_DECAY: 0.0001 21 | STEPS: (20000, 60000) 22 | MAX_ITER: 80000 23 | IMS_PER_BATCH: 1 24 | 25 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNPredictor" 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | # Assume 8 gpus 42 | BASE_LR: 0.02 43 | WEIGHT_DECAY: 0.0001 44 | STEPS: (60000, 80000) 45 | MAX_ITER: 90000 46 | IMS_PER_BATCH: 16 47 | TEST: 48 | IMS_PER_BATCH: 8 49 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | CONV_HEAD_DIM: 256 34 | NUM_STACKED_CONVS: 4 35 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (60000, 80000) 47 | MAX_ITER: 90000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNPredictor" 35 | ROI_MASK_HEAD: 36 | USE_GN: True # use GN for mask head 37 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 38 | CONV_LAYERS: (256, 256, 256, 256) 39 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 40 | PREDICTOR: "MaskRCNNC4Predictor" 41 | POOLER_RESOLUTION: 14 42 | POOLER_SAMPLING_RATIO: 2 43 | RESOLUTION: 28 44 | SHARE_BOX_FEATURE_EXTRACTOR: False 45 | MASK_ON: True 46 | DATASETS: 47 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 48 | TEST: ("coco_2014_minival",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | # Assume 8 gpus 53 | BASE_LR: 0.02 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (60000, 80000) 56 | MAX_ITER: 90000 57 | IMS_PER_BATCH: 16 58 | TEST: 59 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | RESNETS: # use GN for backbone 13 | TRANS_FUNC: "BottleneckWithGN" 14 | STEM_FUNC: "StemWithGN" 15 | FPN: 16 | USE_GN: True # use GN for FPN 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | ROI_BOX_HEAD: 29 | USE_GN: True # use GN for bbox head 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | CONV_HEAD_DIM: 256 34 | NUM_STACKED_CONVS: 4 35 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | ROI_MASK_HEAD: 38 | USE_GN: True # use GN for mask head 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | CONV_LAYERS: (256, 256, 256, 256) 41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 42 | PREDICTOR: "MaskRCNNC4Predictor" 43 | POOLER_RESOLUTION: 14 44 | POOLER_SAMPLING_RATIO: 2 45 | RESOLUTION: 28 46 | SHARE_BOX_FEATURE_EXTRACTOR: False 47 | MASK_ON: True 48 | DATASETS: 49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 50 | TEST: ("coco_2014_minival",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | # Assume 8 gpus 55 | BASE_LR: 0.02 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (60000, 80000) 58 | MAX_ITER: 90000 59 | IMS_PER_BATCH: 16 60 | TEST: 61 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (210000, 250000) 46 | MAX_ITER: 270000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (210000, 250000) 48 | MAX_ITER: 270000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (210000, 250000) 57 | MAX_ITER: 270000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: 800 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 13 | RESNETS: # use GN for backbone 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (210000, 250000) 59 | MAX_ITER: 270000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.001 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (50000, ) 17 | MAX_ITER: 70000 18 | IMS_PER_BATCH: 1 19 | TEST: 20 | IMS_PER_BATCH: 1 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.004 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (12500, ) 17 | MAX_ITER: 17500 18 | IMS_PER_BATCH: 4 19 | TEST: 20 | IMS_PER_BATCH: 4 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | NUM_CLASSES: 21 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("voc_2012_train_cocostyle",) 34 | TEST: ("voc_2012_val_cocostyle",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (18000,) 41 | MAX_ITER: 24000 42 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: 600 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_minival",) 25 | TEST: ("coco_2014_minival",) 26 | INPUT: 27 | MIN_SIZE_TRAIN: 600 28 | MAX_SIZE_TRAIN: 1000 29 | MIN_SIZE_TEST: 800 30 | MAX_SIZE_TEST: 1000 31 | DATALOADER: 32 | SIZE_DIVISIBILITY: 32 33 | SOLVER: 34 | BASE_LR: 0.005 35 | WEIGHT_DECAY: 0.0001 36 | STEPS: (1500,) 37 | MAX_ITER: 2000 38 | IMS_PER_BATCH: 4 39 | TEST: 40 | IMS_PER_BATCH: 2 41 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | RESNETS: 24 | STRIDE_IN_1X1: False 25 | NUM_GROUPS: 32 26 | WIDTH_PER_GROUP: 8 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: 600 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: 600 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_minival",) 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: 600 37 | MAX_SIZE_TRAIN: 1000 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1000 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | BASE_LR: 0.005 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (1500,) 46 | MAX_ITER: 2000 47 | IMS_PER_BATCH: 4 48 | TEST: 49 | IMS_PER_BATCH: 2 50 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | RESNETS: 32 | STRIDE_IN_1X1: False 33 | NUM_GROUPS: 32 34 | WIDTH_PER_GROUP: 8 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: 600 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: 600 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | DATASETS: 15 | TRAIN: ("coco_2014_minival",) 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: 600 19 | MAX_SIZE_TRAIN: 1000 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1000 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.005 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (1500,) 28 | MAX_ITER: 2000 29 | IMS_PER_BATCH: 4 30 | TEST: 31 | IMS_PER_BATCH: 2 32 | -------------------------------------------------------------------------------- /configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RRPN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | RPN_HEAD: "SingleConvRRPNHead" 8 | 9 | ROI_BOX_HEAD: 10 | NUM_CLASSES: 2 11 | ROI_HEADS: 12 | BATCH_SIZE_PER_IMAGE: 256 13 | 14 | DATASETS: 15 | TRAIN: ("RRPN_train", ) 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (50000, 100000) 20 | MAX_ITER: 140000 21 | IMS_PER_BATCH: 1 22 | 23 | OUTPUT_DIR: './models/IC-13-15-17-Trial-renew/' 24 | -------------------------------------------------------------------------------- /configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RRPN" 3 | WEIGHT: "./models/IC-13-15-17-Trial-multiscale-picked-ft/model_0200000.pth" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | RPN_HEAD: "SingleConvRRPNHead" 8 | 9 | ROI_BOX_HEAD: 10 | NUM_CLASSES: 2 11 | ROI_HEADS: 12 | BATCH_SIZE_PER_IMAGE: 256 13 | NMS: 0.3 14 | DETECTIONS_PER_IMG: 300 15 | 16 | DATALOADER: 17 | SIZE_DIVISIBILITY: 1 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /demo/ICDAR_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['CUDA_VISIBLE_DEVICES'] = '6' 3 | import cv2 4 | import numpy as np 5 | from maskrcnn_benchmark.config import cfg 6 | from predictor import ICDARDemo 7 | 8 | 9 | def write_result_ICDAR(im_file, dets, result_dir): 10 | file_spl = im_file.split('/') 11 | file_name = file_spl[len(file_spl) - 1] 12 | file_name_arr = file_name.split(".") 13 | 14 | file_name_str = file_name_arr[0] 15 | 16 | if not os.path.isdir(result_dir): 17 | os.makedirs(result_dir) 18 | 19 | result = os.path.join(result_dir, "res_" + file_name_str + ".txt") 20 | 21 | return_bboxes = [] 22 | 23 | if not os.path.isfile(result): 24 | os.mknod(result) 25 | result_file = open(result, "w") 26 | 27 | result_str = "" 28 | 29 | for idx in range(len(dets)): 30 | 31 | l, t, r, b = dets[idx].astype(np.int32)[0:4] 32 | 33 | rotated_pts = [ 34 | [l, t], [r, t], [r, b], [l, b] 35 | ] 36 | 37 | #det_str = str(int(rotated_pts[0][0])) + "," + str(int(rotated_pts[0][1])) + "," + \ 38 | # str(int(rotated_pts[1][0])) + "," + str(int(rotated_pts[1][1])) + "," + \ 39 | # str(int(rotated_pts[2][0])) + "," + str(int(rotated_pts[2][1])) + "," + \ 40 | # str(int(rotated_pts[3][0])) + "," + str(int(rotated_pts[3][1])) + "\r\n" 41 | 42 | # rotated_pts = rotated_pts[:,0:2] 43 | 44 | # if (dets[idx][5] > threshold): 45 | # rotated_pts = over_bound_handle(rotated_pts, height, width) 46 | det_str = str(int(l)) + "," + str(int(t)) + "," + \ 47 | str(int(r)) + "," + str(int(b)) + "\r\n" 48 | 49 | result_str = result_str + det_str 50 | return_bboxes.append(dets[idx]) 51 | 52 | # print rotated_pts.shape 53 | 54 | result_file.write(result_str) 55 | result_file.close() 56 | 57 | return return_bboxes 58 | 59 | 60 | model_file = 'text_IC13' 61 | 62 | result_dir = os.path.join('results', model_file) 63 | 64 | if not os.path.isdir(result_dir): 65 | os.makedirs(result_dir) 66 | 67 | config_file = "../configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml" 68 | print('config_file:', config_file) 69 | # update the config options with the config file 70 | cfg.merge_from_file(config_file) 71 | # manual override some options 72 | cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) 73 | 74 | coco_demo = ICDARDemo( 75 | cfg, 76 | min_image_size=800, 77 | confidence_threshold=0.7, 78 | ) 79 | # load image and then run prediction 80 | image_dir = '../../datasets/ICDAR13/Challenge2_Test_Task12_Images/' 81 | 82 | imlist = os.listdir(image_dir) 83 | 84 | for image in imlist: 85 | impath = os.path.join(image_dir, image) 86 | print('image:', impath) 87 | img = cv2.imread(impath) 88 | predictions, bounding_boxes = coco_demo.run_on_opencv_image(img) 89 | # print('predictions:', predictions.shape) 90 | 91 | bboxes_np = bounding_boxes.bbox.data.cpu().numpy() 92 | write_result_ICDAR(image[:-4], bboxes_np, result_dir) 93 | #cv2.imshow('win', predictions) 94 | #cv2.waitKey(0) -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference. 4 | 5 | 6 | ### With your preferred environment 7 | 8 | You can start it by running it from this folder, using one of the following commands: 9 | ```bash 10 | # by default, it runs on the GPU 11 | # for best results, use min-image-size 800 12 | python webcam.py --min-image-size 800 13 | # can also run it on the CPU 14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 15 | # or change the model that you want to use 16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu 17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 19 | ``` 20 | 21 | ### With Docker 22 | 23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions) 24 | 25 | Adjust permissions of the X server host (be careful with this step, refer to 26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives) 27 | 28 | ```bash 29 | xhost + 30 | ``` 31 | 32 | Then run a container with the demo: 33 | 34 | ``` 35 | docker run --rm -it \ 36 | -e DISPLAY=${DISPLAY} \ 37 | --privileged \ 38 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 39 | --device=/dev/video0:/dev/video0 \ 40 | --ipc=host maskrcnn-benchmark \ 41 | python demo/webcam.py --min-image-size 300 42 | ``` 43 | 44 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 45 | the volume mapping may vary depending on your platform* 46 | -------------------------------------------------------------------------------- /demo/RRPN_Demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from maskrcnn_benchmark.config import cfg 5 | from demo.predictor import ICDARDemo, RRPNDemo 6 | from maskrcnn_benchmark.utils.visualize import vis_image, write_result_ICDAR_RRPN2polys, zip_dir 7 | from PIL import Image 8 | import time 9 | 10 | 11 | config_file = "./configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml" 12 | 13 | # update the config options with the config file 14 | cfg.merge_from_file(config_file) 15 | # manual override some options 16 | cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) 17 | # cfg.freeze() 18 | # cfg.MODEL.WEIGHT = 'models/IC-13-15-17-Trial/model_0155000.pth' 19 | 20 | result_dir = os.path.join('results', config_file.split('/')[-1].split('.')[0], cfg.MODEL.WEIGHT.split('/')[-1].split('.')[0]) 21 | 22 | if not os.path.isdir(result_dir): 23 | os.makedirs(result_dir) 24 | 25 | 26 | coco_demo = RRPNDemo( 27 | cfg, 28 | min_image_size=1000, 29 | confidence_threshold=0.85, 30 | ) 31 | 32 | dataset_name = 'IC15' 33 | 34 | testing_dataset = { 35 | 'IC13': { 36 | 'testing_image_dir': '../datasets/ICDAR13/Challenge2_Test_Task12_Images', 37 | 'test_vocal_dir': '../datasets/ICDAR13/ch2_test_vocabularies_per_image' 38 | }, 39 | 'IC15': { 40 | 'testing_image_dir': '../datasets/ICDAR15/ch4_test_images', 41 | 'test_vocal_dir': '../datasets/ICDAR15/ch4_test_vocabularies_per_image' 42 | }, 43 | } 44 | 45 | image_dir = testing_dataset[dataset_name]['testing_image_dir'] 46 | vocab_dir = testing_dataset[dataset_name]['test_vocal_dir'] 47 | 48 | # load image and then run prediction 49 | # image_dir = '../datasets/ICDAR13/Challenge2_Test_Task12_Images/' 50 | imlist = os.listdir(image_dir) 51 | 52 | print('************* META INFO ***************') 53 | print('config_file:', config_file) 54 | print('result_dir:', result_dir) 55 | print('image_dir:', image_dir) 56 | print('weights:', cfg.MODEL.WEIGHT) 57 | print('***************************************') 58 | 59 | vis = True 60 | 61 | num_images = len(imlist) 62 | cnt = 0 63 | 64 | for image in imlist: 65 | impath = os.path.join(image_dir, image) 66 | # print('image:', impath) 67 | img = cv2.imread(impath) 68 | cnt += 1 69 | tic = time.time() 70 | predictions, bounding_boxes = coco_demo.run_on_opencv_image(img) 71 | toc = time.time() 72 | 73 | print('time cost:', str(toc - tic)[:6], '|', str(cnt) + '/' + str(num_images)) 74 | 75 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 76 | bboxes_np = bounding_boxes.bbox.data.cpu().numpy() 77 | bboxes_np[:, 2:4] /= cfg.MODEL.RRPN.GT_BOX_MARGIN 78 | 79 | width, height = bounding_boxes.size 80 | 81 | if vis: 82 | pil_image = vis_image(Image.fromarray(img), bboxes_np) 83 | pil_image.show() 84 | time.sleep(10) 85 | write_result_ICDAR_RRPN2polys(image[:-4], bboxes_np, threshold=0.7, result_dir=result_dir, height=height, width=width) 86 | #im_file, dets, threshold, result_dir, height, width 87 | #cv2.imshow('win', predictions) 88 | #cv2.waitKey(0) 89 | 90 | ''' 91 | if dataset_name == 'IC15': 92 | zipfilename = os.path.join(result_dir, 'submit_' + config_file.split('/')[-1].split('.')[0] + '_' + cfg.MODEL.WEIGHT.split('/')[-1].split('.')[0] + '.zip') 93 | if os.path.isfile(zipfilename): 94 | print('Zip file exists, removing it...') 95 | os.remove(zipfilename) 96 | zip_dir(result_dir, zipfilename) 97 | comm = 'curl -i -F "submissionFile=@' + zipfilename + '" http://127.0.0.1:8080/evaluate' 98 | # print(comm) 99 | print(os.popen(comm, 'r')) 100 | else: 101 | pass 102 | ''' -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/__init__.py -------------------------------------------------------------------------------- /demo/results/rrpn1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn1.png -------------------------------------------------------------------------------- /demo/results/rrpn2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn2.png -------------------------------------------------------------------------------- /demo/results/rrpn3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn3.png -------------------------------------------------------------------------------- /demo/results/rrpn4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn4.png -------------------------------------------------------------------------------- /demo/results/rrpn5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn5.png -------------------------------------------------------------------------------- /demo/webcam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import argparse 3 | import cv2 4 | 5 | from maskrcnn_benchmark.config import cfg 6 | from predictor import COCODemo 7 | 8 | import time 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") 13 | parser.add_argument( 14 | "--config-file", 15 | default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", 16 | metavar="FILE", 17 | help="path to config file", 18 | ) 19 | parser.add_argument( 20 | "--confidence-threshold", 21 | type=float, 22 | default=0.7, 23 | help="Minimum score for the prediction to be shown", 24 | ) 25 | parser.add_argument( 26 | "--min-image-size", 27 | type=int, 28 | default=224, 29 | help="Smallest size of the image to feed to the model. " 30 | "Model was trained with 800, which gives best results", 31 | ) 32 | parser.add_argument( 33 | "--show-mask-heatmaps", 34 | dest="show_mask_heatmaps", 35 | help="Show a heatmap probability for the top masks-per-dim masks", 36 | action="store_true", 37 | ) 38 | parser.add_argument( 39 | "--masks-per-dim", 40 | type=int, 41 | default=2, 42 | help="Number of heatmaps per dimension to show", 43 | ) 44 | parser.add_argument( 45 | "opts", 46 | help="Modify model config options using the command-line", 47 | default=None, 48 | nargs=argparse.REMAINDER, 49 | ) 50 | 51 | args = parser.parse_args() 52 | 53 | # load config from file and command-line arguments 54 | cfg.merge_from_file(args.config_file) 55 | cfg.merge_from_list(args.opts) 56 | cfg.freeze() 57 | 58 | # prepare object that handles inference plus adds predictions on top of image 59 | coco_demo = COCODemo( 60 | cfg, 61 | confidence_threshold=args.confidence_threshold, 62 | show_mask_heatmaps=args.show_mask_heatmaps, 63 | masks_per_dim=args.masks_per_dim, 64 | min_image_size=args.min_image_size, 65 | ) 66 | 67 | cam = cv2.VideoCapture(0) 68 | while True: 69 | start_time = time.time() 70 | ret_val, img = cam.read() 71 | composite = coco_demo.run_on_opencv_image(img) 72 | print("Time: {:.2f} s / img".format(time.time() - start_time)) 73 | cv2.imshow("COCO detections", composite) 74 | if cv2.waitKey(1) == 27: 75 | break # esc to quit 76 | cv2.destroyAllWindows() 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \ 11 | && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev 12 | 13 | # Install Miniconda 14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 15 | && chmod +x /miniconda.sh \ 16 | && /miniconda.sh -b -p /miniconda \ 17 | && rm /miniconda.sh 18 | 19 | ENV PATH=/miniconda/bin:$PATH 20 | 21 | # Create a Python 3.6 environment 22 | RUN /miniconda/bin/conda install -y conda-build \ 23 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 24 | && /miniconda/bin/conda clean -ya 25 | 26 | ENV CONDA_DEFAULT_ENV=py36 27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 28 | ENV PATH=$CONDA_PREFIX/bin:$PATH 29 | ENV CONDA_AUTO_UPDATE_CONDA=false 30 | 31 | RUN conda install -y ipython 32 | RUN pip install ninja yacs cython matplotlib opencv-python 33 | 34 | # Install PyTorch 1.0 Nightly and OpenCV 35 | RUN conda install -y pytorch-nightly -c pytorch \ 36 | && conda clean -ya 37 | 38 | # Install TorchVision master 39 | RUN git clone https://github.com/pytorch/vision.git \ 40 | && cd vision \ 41 | && python setup.py install 42 | 43 | # install pycocotools 44 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 45 | && cd cocoapi/PythonAPI \ 46 | && python setup.py build_ext install 47 | 48 | # install PyTorch Detection 49 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 50 | && cd maskrcnn-benchmark \ 51 | && python setup.py build develop 52 | 53 | WORKDIR /maskrcnn-benchmark 54 | -------------------------------------------------------------------------------- /docker/docker-jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ 11 | 12 | # Install Miniconda 13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 14 | && chmod +x /miniconda.sh \ 15 | && /miniconda.sh -b -p /miniconda \ 16 | && rm /miniconda.sh 17 | 18 | ENV PATH=/miniconda/bin:$PATH 19 | 20 | # Create a Python 3.6 environment 21 | RUN /miniconda/bin/conda install -y conda-build \ 22 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 23 | && /miniconda/bin/conda clean -ya 24 | 25 | ENV CONDA_DEFAULT_ENV=py36 26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 27 | ENV PATH=$CONDA_PREFIX/bin:$PATH 28 | ENV CONDA_AUTO_UPDATE_CONDA=false 29 | 30 | RUN conda install -y ipython 31 | RUN pip install ninja yacs cython matplotlib jupyter 32 | 33 | # Install PyTorch 1.0 Nightly and OpenCV 34 | RUN conda install -y pytorch-nightly -c pytorch \ 35 | && conda install -y opencv -c menpo \ 36 | && conda clean -ya 37 | 38 | WORKDIR /root 39 | 40 | USER root 41 | 42 | RUN mkdir /notebooks 43 | 44 | WORKDIR /notebooks 45 | 46 | # Install TorchVision master 47 | RUN git clone https://github.com/pytorch/vision.git \ 48 | && cd vision \ 49 | && python setup.py install 50 | 51 | # install pycocotools 52 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 53 | && cd cocoapi/PythonAPI \ 54 | && python setup.py build_ext install 55 | 56 | # install PyTorch Detection 57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 58 | && cd maskrcnn-benchmark \ 59 | && python setup.py build develop 60 | 61 | RUN jupyter notebook --generate-config 62 | 63 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py" 64 | 65 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH} 66 | 67 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"] 68 | -------------------------------------------------------------------------------- /docker/docker-jupyter/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from IPython.lib import passwd 3 | 4 | #c = c # pylint:disable=undefined-variable 5 | c = get_config() 6 | c.NotebookApp.ip = '0.0.0.0' 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888)) 8 | c.NotebookApp.open_browser = False 9 | 10 | # sets a password if PASSWORD is set in the environment 11 | if 'PASSWORD' in os.environ: 12 | password = os.environ['PASSWORD'] 13 | if password: 14 | c.NotebookApp.password = passwd(password) 15 | else: 16 | c.NotebookApp.password = '' 17 | c.NotebookApp.token = '' 18 | del os.environ['PASSWORD'] 19 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/RROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | std::tuple RROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return RROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | //return RROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor RROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const at::Tensor& con_idx_x, 30 | const at::Tensor& con_idx_y, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int batch_size, 35 | const int channels, 36 | const int height, 37 | const int width) { 38 | if (grad.type().is_cuda()) { 39 | #ifdef WITH_CUDA 40 | return RROIAlign_backward_cuda(grad, rois, con_idx_x, con_idx_y, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 41 | #else 42 | AT_ERROR("Not compiled with GPU support"); 43 | #endif 44 | } 45 | AT_ERROR("Not implemented on the CPU"); 46 | } 47 | 48 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 14 | const at::Tensor& rois, 15 | const float spatial_scale, 16 | const int pooled_height, 17 | const int pooled_width, 18 | const int batch_size, 19 | const int channels, 20 | const int height, 21 | const int width, 22 | const int sampling_ratio); 23 | 24 | 25 | std::tuple RROIAlign_forward_cuda(const at::Tensor& input, 26 | const at::Tensor& rois, 27 | const float spatial_scale, 28 | const int pooled_height, 29 | const int pooled_width); 30 | 31 | at::Tensor RROIAlign_backward_cuda(const at::Tensor& grad, 32 | const at::Tensor& rois, 33 | const at::Tensor& con_idx_x, 34 | const at::Tensor& con_idx_y, 35 | const float spatial_scale, 36 | const int pooled_height, 37 | const int pooled_width, 38 | const int batch_size, 39 | const int channels, 40 | const int height, 41 | const int width); 42 | 43 | 44 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 45 | const at::Tensor& rois, 46 | const float spatial_scale, 47 | const int pooled_height, 48 | const int pooled_width); 49 | 50 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const at::Tensor& argmax, 54 | const float spatial_scale, 55 | const int pooled_height, 56 | const int pooled_width, 57 | const int batch_size, 58 | const int channels, 59 | const int height, 60 | const int width); 61 | 62 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 63 | 64 | 65 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 66 | const int height, 67 | const int width); 68 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "RROIAlign.h" 6 | 7 | 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 9 | m.def("nms", &nms, "non-maximum suppression"); 10 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 11 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 12 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 13 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 14 | 15 | m.def("rroi_align_forward", &RROIAlign_forward, "RROIAlign_forward"); 16 | m.def("rroi_align_backward", &RROIAlign_backward, "RROIAlign_backward"); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/README.md: -------------------------------------------------------------------------------- 1 | # Setting Up Datasets 2 | This file describes how to perform training on other datasets. 3 | 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently. 5 | 6 | We expect the annotations from other datasets be converted to COCO json format, and 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm) 8 | 9 | ## Creating Symlinks for PASCAL VOC 10 | 11 | We assume that your symlinked `datasets/voc/VOC` directory has the following structure: 12 | 13 | ``` 14 | VOC 15 | |_ JPEGImages 16 | | |_ .jpg 17 | | |_ ... 18 | | |_ .jpg 19 | |_ Annotations 20 | | |_ pascal_train.json (optional) 21 | | |_ pascal_val.json (optional) 22 | | |_ pascal_test.json (optional) 23 | | |_ .xml 24 | | |_ ... 25 | | |_ .xml 26 | |_ VOCdevkit 27 | ``` 28 | 29 | Create symlinks for `voc/VOC`: 30 | 31 | ``` 32 | cd ~/github/maskrcnn-benchmark 33 | mkdir -p datasets/voc/VOC 34 | ln -s /path/to/VOC /datasets/voc/VOC 35 | ``` 36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/). 37 | 38 | ### PASCAL VOC Annotations in COCO Format 39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) 40 | via http://cocodataset.org/#external. 41 | 42 | ## Creating Symlinks for Cityscapes: 43 | 44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure: 45 | 46 | ``` 47 | cityscapes 48 | |_ images 49 | | |_ .jpg 50 | | |_ ... 51 | | |_ .jpg 52 | |_ annotations 53 | | |_ instanceonly_gtFile_train.json 54 | | |_ ... 55 | |_ raw 56 | |_ gtFine 57 | |_ ... 58 | |_ README.md 59 | ``` 60 | 61 | Create symlinks for `cityscapes`: 62 | 63 | ``` 64 | cd ~/github/maskrcnn-benchmark 65 | mkdir -p datasets/cityscapes 66 | ln -s /path/to/cityscapes datasets/data/cityscapes 67 | ``` 68 | 69 | ### Steps to convert Cityscapes Annotations to COCO Format 70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required) 71 | 2. Extract it to /path/to/gtFine_trainvaltest 72 | ``` 73 | gtFine_trainvaltest 74 | |_ gtFine 75 | ``` 76 | 3. Run the below commands to convert the annotations 77 | 78 | ``` 79 | cd ~/github 80 | git clone https://github.com/mcordts/cityscapesScripts.git 81 | cd cityscapesScripts 82 | cp ~/github/maskrcnn-benchmark/tool/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation 83 | python setup.py install 84 | cd ~/github/maskrcnn-benchmark 85 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/gtFine_trainvaltest --outdir /path/to/cityscapes/annotations 86 | ``` 87 | 88 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/). 89 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | from .icdar_series import ICDAR2013Dataset 6 | from .rotation_series import RotationDataset 7 | from .rrpn_e2e_series import SpottingDataset 8 | from .rotation_mask_datasets import RotationMaskDataset 9 | 10 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", 'ICDAR2013Dataset', 'RotationDataset', 'SpottingDataset', 'RotationMaskDataset'] 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 7 | 8 | 9 | class COCODataset(torchvision.datasets.coco.CocoDetection): 10 | def __init__( 11 | self, ann_file, root, remove_images_without_annotations, transforms=None 12 | ): 13 | super(COCODataset, self).__init__(root, ann_file) 14 | # sort indices for reproducible results 15 | self.ids = sorted(self.ids) 16 | 17 | # filter images without detection annotations 18 | if remove_images_without_annotations: 19 | self.ids = [ 20 | img_id 21 | for img_id in self.ids 22 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 23 | ] 24 | 25 | ids_to_remove = [] 26 | for img_id in self.ids: 27 | ann_ids = self.coco.getAnnIds(imgIds=img_id) 28 | anno = self.coco.loadAnns(ann_ids) 29 | if all( 30 | any(o <= 1 for o in obj["bbox"][2:]) 31 | for obj in anno 32 | if obj["iscrowd"] == 0 33 | ): 34 | ids_to_remove.append(img_id) 35 | 36 | self.ids = [ 37 | img_id for img_id in self.ids if img_id not in ids_to_remove 38 | ] 39 | 40 | self.json_category_id_to_contiguous_id = { 41 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 42 | } 43 | self.contiguous_category_id_to_json_id = { 44 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 45 | } 46 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 47 | self.transforms = transforms 48 | 49 | def __getitem__(self, idx): 50 | img, anno = super(COCODataset, self).__getitem__(idx) 51 | 52 | # filter crowd annotations 53 | # TODO might be better to add an extra field 54 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 55 | 56 | boxes = [obj["bbox"] for obj in anno] 57 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 58 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 59 | 60 | classes = [obj["category_id"] for obj in anno] 61 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 62 | classes = torch.tensor(classes) 63 | target.add_field("labels", classes) 64 | 65 | masks = [obj["segmentation"] for obj in anno] 66 | masks = SegmentationMask(masks, img.size) 67 | target.add_field("masks", masks) 68 | 69 | target = target.clip_to_image(remove_empty=True) 70 | 71 | if self.transforms is not None: 72 | img, target = self.transforms(img, target) 73 | 74 | return img, target, idx 75 | 76 | def get_img_info(self, index): 77 | img_id = self.id_to_img_map[index] 78 | img_data = self.coco.imgs[img_id] 79 | return img_data 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | else: 26 | dataset_name = dataset.__class__.__name__ 27 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 28 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = True 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | else: 11 | min_size = cfg.INPUT.MIN_SIZE_TEST 12 | max_size = cfg.INPUT.MAX_SIZE_TEST 13 | flip_prob = 0 14 | 15 | to_bgr255 = cfg.INPUT.TO_BGR255 16 | normalize_transform = T.Normalize( 17 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 18 | ) 19 | 20 | _aug_list = { 21 | "RRPN":T.Compose( 22 | [ 23 | T.Resize(min_size, max_size), 24 | T.RandomRotation(prob=1.0, r_range=cfg.INPUT.ROTATION_RANGE, fixed_angle=-1, gt_margin=cfg.MODEL.RRPN.GT_BOX_MARGIN), 25 | T.ToTensor(), 26 | # T.MixUp(mix_ratio=0.1), 27 | normalize_transform, 28 | ] 29 | ), 30 | "GeneralizedRCNN":T.Compose( 31 | [ 32 | T.Resize(min_size, max_size), 33 | T.RandomHorizontalFlip(flip_prob), 34 | T.ToTensor(), 35 | normalize_transform, 36 | ] 37 | ) 38 | } 39 | ''' 40 | if cfg.MODEL.META_ARCHITECTURE == "RRPN": 41 | transform = T.Compose( 42 | [ 43 | T.Resize(min_size, max_size), 44 | T.RandomRotation(prob=0, fixed_angle=30), 45 | T.ToTensor(), 46 | normalize_transform, 47 | ] 48 | ) 49 | else: 50 | transform = T.Compose( 51 | [ 52 | T.Resize(min_size, max_size), 53 | T.RandomHorizontalFlip(flip_prob), 54 | T.ToTensor(), 55 | normalize_transform, 56 | ] 57 | ) 58 | ''' 59 | return _aug_list[cfg.MODEL.META_ARCHITECTURE] 60 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import ConvTranspose2d 7 | from .misc import interpolate 8 | from .nms import nms 9 | from .roi_align import ROIAlign 10 | from .rroi_align import RROIAlign 11 | from .roi_align import roi_align 12 | from .rroi_align import rroi_align 13 | from .roi_pool import ROIPool 14 | from .roi_pool import roi_pool 15 | from .smooth_l1_loss import smooth_l1_loss 16 | 17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 18 | "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", 19 | "FrozenBatchNorm2d", "RROIAlign" 20 | ] 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | helper class that supports empty tensors on some nn functions. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in 6 | those functions. 7 | 8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013 9 | is implemented 10 | """ 11 | 12 | import math 13 | import torch 14 | from torch.nn.modules.utils import _ntuple 15 | 16 | 17 | class _NewEmptyTensorOp(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, new_shape): 20 | ctx.shape = x.shape 21 | return x.new_empty(new_shape) 22 | 23 | @staticmethod 24 | def backward(ctx, grad): 25 | shape = ctx.shape 26 | return _NewEmptyTensorOp.apply(grad, shape), None 27 | 28 | 29 | 30 | class Conv2d(torch.nn.Conv2d): 31 | def forward(self, x): 32 | if x.numel() > 0: 33 | return super(Conv2d, self).forward(x) 34 | # get output shape 35 | 36 | output_shape = [ 37 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 38 | for i, p, di, k, d in zip( 39 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride 40 | ) 41 | ] 42 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 43 | return _NewEmptyTensorOp.apply(x, output_shape) 44 | 45 | 46 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 47 | def forward(self, x): 48 | if x.numel() > 0: 49 | return super(ConvTranspose2d, self).forward(x) 50 | # get output shape 51 | 52 | output_shape = [ 53 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 54 | for i, p, di, k, d, op in zip( 55 | x.shape[-2:], 56 | self.padding, 57 | self.dilation, 58 | self.kernel_size, 59 | self.stride, 60 | self.output_padding, 61 | ) 62 | ] 63 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape 64 | return _NewEmptyTensorOp.apply(x, output_shape) 65 | 66 | 67 | def interpolate( 68 | input, size=None, scale_factor=None, mode="nearest", align_corners=None 69 | ): 70 | if input.numel() > 0: 71 | return torch.nn.functional.interpolate( 72 | input, size, scale_factor, mode, align_corners 73 | ) 74 | 75 | def _check_size_scale_factor(dim): 76 | if size is None and scale_factor is None: 77 | raise ValueError("either size or scale_factor should be defined") 78 | if size is not None and scale_factor is not None: 79 | raise ValueError("only one of size or scale_factor should be defined") 80 | if ( 81 | scale_factor is not None 82 | and isinstance(scale_factor, tuple) 83 | and len(scale_factor) != dim 84 | ): 85 | raise ValueError( 86 | "scale_factor shape must match input shape. " 87 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) 88 | ) 89 | 90 | def _output_size(dim): 91 | _check_size_scale_factor(dim) 92 | if size is not None: 93 | return size 94 | scale_factors = _ntuple(dim)(scale_factor) 95 | # math.floor might return float in py2.7 96 | return [ 97 | int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) 98 | ] 99 | 100 | output_shape = tuple(_output_size(2)) 101 | output_shape = input.shape[:-2] + output_shape 102 | return _NewEmptyTensorOp.apply(input, output_shape) 103 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/rroi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/arpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform 8 | from . import fpn as fpn_module 9 | from . import resnet 10 | 11 | 12 | @registry.BACKBONES.register("R-50-C4") 13 | @registry.BACKBONES.register("R-50-C5") 14 | @registry.BACKBONES.register("R-50-FP4P") 15 | @registry.BACKBONES.register("R-101-C4") 16 | @registry.BACKBONES.register("R-101-C5") 17 | def build_resnet_backbone(cfg): 18 | body = resnet.ResNet(cfg) 19 | model = nn.Sequential(OrderedDict([("body", body)])) 20 | return model 21 | 22 | 23 | @registry.BACKBONES.register("R-50-FPN") 24 | @registry.BACKBONES.register("R-101-FPN") 25 | def build_resnet_fpn_backbone(cfg): 26 | body = resnet.ResNet(cfg) 27 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 28 | out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS 29 | fpn = fpn_module.FPN( 30 | in_channels_list=[ 31 | in_channels_stage2, 32 | in_channels_stage2 * 2, 33 | in_channels_stage2 * 4, 34 | in_channels_stage2 * 8, 35 | ], 36 | out_channels=out_channels, 37 | conv_block=conv_with_kaiming_uniform( 38 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 39 | ), 40 | top_blocks=fpn_module.LastLevelMaxPool(), 41 | ) 42 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 43 | return model 44 | 45 | 46 | def build_backbone(cfg): 47 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 48 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 49 | cfg.MODEL.BACKBONE.CONV_BODY 50 | ) 51 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 52 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | 39 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 40 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 41 | 42 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 43 | # protect against not enough positive examples 44 | num_pos = min(positive.numel(), num_pos) 45 | num_neg = self.batch_size_per_image - num_pos 46 | # protect against not enough negative examples 47 | num_neg = min(negative.numel(), num_neg) 48 | 49 | #print('matched_idxs_per_image:', matched_idxs_per_image) 50 | #print('positive:', positive, 'negative', negative) 51 | 52 | # randomly select positive and negative examples 53 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 54 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 55 | 56 | pos_idx_per_image = positive[perm1] 57 | neg_idx_per_image = negative[perm2] 58 | 59 | # create binary mask from indices 60 | pos_idx_per_image_mask = torch.zeros_like( 61 | matched_idxs_per_image, dtype=torch.uint8 62 | ) 63 | neg_idx_per_image_mask = torch.zeros_like( 64 | matched_idxs_per_image, dtype=torch.uint8 65 | ) 66 | pos_idx_per_image_mask[pos_idx_per_image] = 1 67 | neg_idx_per_image_mask[neg_idx_per_image] = 1 68 | 69 | pos_idx.append(pos_idx_per_image_mask) 70 | neg_idx.append(neg_idx_per_image_mask) 71 | 72 | return pos_idx, neg_idx 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | from .generalized_rrpn_rcnn import GeneralizedRRPNRCNN 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, 6 | "RRPN":GeneralizedRRPNRCNN} 7 | 8 | 9 | def build_detection_model(cfg): 10 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 11 | return meta_arch(cfg) 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRCNN, self).__init__() 28 | 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg) 31 | self.roi_heads = build_roi_heads(cfg) 32 | 33 | def forward(self, images, targets=None): 34 | """ 35 | Arguments: 36 | images (list[Tensor] or ImageList): images to be processed 37 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 38 | 39 | Returns: 40 | result (list[BoxList] or dict[Tensor]): the output from the model. 41 | During training, it returns a dict[Tensor] which contains the losses. 42 | During testing, it returns list[BoxList] contains additional fields 43 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 44 | 45 | """ 46 | if self.training and targets is None: 47 | raise ValueError("In training mode, targets should be passed") 48 | images = to_image_list(images) 49 | features = self.backbone(images.tensors) 50 | proposals, proposal_losses = self.rpn(images, features, targets) 51 | if self.roi_heads: 52 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 53 | else: 54 | # RPN-only models don't have roi_heads 55 | x = features 56 | result = proposals 57 | detector_losses = {} 58 | 59 | if self.training: 60 | losses = {} 61 | losses.update(detector_losses) 62 | losses.update(proposal_losses) 63 | return losses 64 | 65 | return result 66 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rrpn_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rrpn.rrpn import build_rpn 13 | from ..roi_heads.rroi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRRPNRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRRPNRCNN, self).__init__() 28 | 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg) 31 | self.roi_heads = build_roi_heads(cfg) 32 | self.fp4p_on = cfg.MODEL.FP4P_ON 33 | 34 | def forward(self, images, targets=None): 35 | """ 36 | Arguments: 37 | images (list[Tensor] or ImageList): images to be processed 38 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 39 | 40 | Returns: 41 | result (list[BoxList] or dict[Tensor]): the output from the model. 42 | During training, it returns a dict[Tensor] which contains the losses. 43 | During testing, it returns list[BoxList] contains additional fields 44 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 45 | 46 | """ 47 | if self.training and targets is None: 48 | raise ValueError("In training mode, targets should be passed") 49 | images = to_image_list(images) 50 | features = self.backbone(images.tensors) 51 | 52 | if self.fp4p_on: 53 | # get you C4 54 | proposals, proposal_losses = self.rpn(images, (features[-1],), targets) 55 | else: 56 | proposals, proposal_losses = self.rpn(images, features, targets) 57 | 58 | # features = [feature.detach() for feature in features] 59 | 60 | if self.roi_heads: 61 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 62 | else: 63 | # RPN-only models don't have roi_heads 64 | x = features 65 | result = proposals 66 | detector_losses = {} 67 | 68 | if self.training: 69 | losses = {} 70 | losses.update(detector_losses) 71 | losses.update(proposal_losses) 72 | return losses 73 | 74 | return result 75 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 7 | RPN_HEADS = Registry() 8 | RROI_BOX_FEATURE_EXTRACTORS = Registry() -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg) 19 | self.predictor = make_roi_box_predictor(cfg) 20 | self.post_processor = make_roi_box_post_processor(cfg) 21 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 22 | 23 | def forward(self, features, proposals, targets=None): 24 | """ 25 | Arguments: 26 | features (list[Tensor]): feature-maps from possibly several levels 27 | proposals (list[BoxList]): proposal boxes 28 | targets (list[BoxList], optional): the ground-truth targets. 29 | 30 | Returns: 31 | x (Tensor): the result of the feature extractor 32 | proposals (list[BoxList]): during training, the subsampled proposals 33 | are returned. During testing, the predicted boxlists are returned 34 | losses (dict[Tensor]): During training, returns the losses for the 35 | head. During testing, returns an empty dict. 36 | """ 37 | 38 | if self.training: 39 | # Faster R-CNN subsamples during training the proposals with a fixed 40 | # positive / negative ratio 41 | with torch.no_grad(): 42 | proposals = self.loss_evaluator.subsample(proposals, targets) 43 | 44 | # extract features that will be fed to the final classifier. The 45 | # feature_extractor generally corresponds to the pooler + heads 46 | x = self.feature_extractor(features, proposals) 47 | # final classifier that converts the features into predictions 48 | class_logits, box_regression = self.predictor(x) 49 | 50 | if not self.training: 51 | result = self.post_processor((class_logits, box_regression), proposals) 52 | return x, result, {} 53 | 54 | loss_classifier, loss_box_reg = self.loss_evaluator( 55 | [class_logits], [box_regression] 56 | ) 57 | return ( 58 | x, 59 | proposals, 60 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 61 | ) 62 | 63 | 64 | def build_roi_box_head(cfg): 65 | """ 66 | Constructs a new box head. 67 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 68 | and make it a parameter in the config 69 | """ 70 | return ROIBoxHead(cfg) 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | 4 | 5 | class FastRCNNPredictor(nn.Module): 6 | def __init__(self, config, pretrained=None): 7 | super(FastRCNNPredictor, self).__init__() 8 | 9 | stage_index = 4 10 | stage2_relative_factor = 2 ** (stage_index - 1) 11 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 12 | num_inputs = res2_out_channels * stage2_relative_factor 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | class FPNPredictor(nn.Module): 34 | def __init__(self, cfg): 35 | super(FPNPredictor, self).__init__() 36 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 37 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 38 | 39 | self.cls_score = nn.Linear(representation_size, num_classes) 40 | self.bbox_pred = nn.Linear(representation_size, num_classes * 4) 41 | 42 | nn.init.normal_(self.cls_score.weight, std=0.01) 43 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 44 | for l in [self.cls_score, self.bbox_pred]: 45 | nn.init.constant_(l.bias, 0) 46 | 47 | def forward(self, x): 48 | scores = self.cls_score(x) 49 | bbox_deltas = self.bbox_pred(x) 50 | 51 | return scores, bbox_deltas 52 | 53 | 54 | _ROI_BOX_PREDICTOR = { 55 | "FastRCNNPredictor": FastRCNNPredictor, 56 | "FPNPredictor": FPNPredictor, 57 | } 58 | 59 | 60 | def make_roi_box_predictor(cfg): 61 | func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg) 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], BoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg) 41 | self.predictor = make_roi_mask_predictor(cfg) 42 | self.post_processor = make_roi_mask_post_processor(cfg) 43 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None): 46 | """ 47 | Arguments: 48 | features (list[Tensor]): feature-maps from possibly several levels 49 | proposals (list[BoxList]): proposal boxes 50 | targets (list[BoxList], optional): the ground-truth targets. 51 | 52 | Returns: 53 | x (Tensor): the result of the feature extractor 54 | proposals (list[BoxList]): during training, the original proposals 55 | are returned. During testing, the predicted boxlists are returned 56 | with the `mask` field set 57 | losses (dict[Tensor]): During training, returns the losses for the 58 | head. During testing, returns an empty dict. 59 | """ 60 | 61 | if self.training: 62 | # during training, only focus on positive boxes 63 | all_proposals = proposals 64 | proposals, positive_inds = keep_only_positive_boxes(proposals) 65 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 66 | x = features 67 | x = x[torch.cat(positive_inds, dim=0)] 68 | else: 69 | x = self.feature_extractor(features, proposals) 70 | mask_logits = self.predictor(x) 71 | 72 | if not self.training: 73 | result = self.post_processor(mask_logits, proposals) 74 | return x, result, {} 75 | 76 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 77 | 78 | return x, all_proposals, dict(loss_mask=loss_mask) 79 | 80 | 81 | def build_roi_mask_head(cfg): 82 | return ROIMaskHead(cfg) 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling.poolers import Pooler 7 | from maskrcnn_benchmark.layers import Conv2d 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | class MaskRCNNFPNFeatureExtractor(nn.Module): 12 | """ 13 | Heads for FPN for classification 14 | """ 15 | 16 | def __init__(self, cfg): 17 | """ 18 | Arguments: 19 | num_classes (int): number of output classes 20 | input_size (int): number of channels of the input once it's flattened 21 | representation_size (int): size of the intermediate representation 22 | """ 23 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 24 | 25 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 26 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 27 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 28 | pooler = Pooler( 29 | output_size=(resolution, resolution), 30 | scales=scales, 31 | sampling_ratio=sampling_ratio, 32 | ) 33 | input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS 34 | self.pooler = pooler 35 | 36 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 37 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 38 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 39 | 40 | next_feature = input_size 41 | self.blocks = [] 42 | for layer_idx, layer_features in enumerate(layers, 1): 43 | layer_name = "mask_fcn{}".format(layer_idx) 44 | module = make_conv3x3(next_feature, layer_features, 45 | dilation=dilation, stride=1, use_gn=use_gn 46 | ) 47 | self.add_module(layer_name, module) 48 | next_feature = layer_features 49 | self.blocks.append(layer_name) 50 | 51 | def forward(self, x, proposals): 52 | x = self.pooler(x, proposals) 53 | 54 | for layer_name in self.blocks: 55 | x = F.relu(getattr(self, layer_name)(x)) 56 | 57 | return x 58 | 59 | 60 | _ROI_MASK_FEATURE_EXTRACTORS = { 61 | "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, 62 | "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor, 63 | } 64 | 65 | 66 | def make_roi_mask_feature_extractor(cfg): 67 | func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR] 68 | return func(cfg) 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | 8 | 9 | class MaskRCNNC4Predictor(nn.Module): 10 | def __init__(self, cfg): 11 | super(MaskRCNNC4Predictor, self).__init__() 12 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 13 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 14 | 15 | if cfg.MODEL.ROI_HEADS.USE_FPN: 16 | num_inputs = dim_reduced 17 | else: 18 | stage_index = 4 19 | stage2_relative_factor = 2 ** (stage_index - 1) 20 | res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 21 | num_inputs = res2_out_channels * stage2_relative_factor 22 | 23 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 24 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 25 | 26 | for name, param in self.named_parameters(): 27 | if "bias" in name: 28 | nn.init.constant_(param, 0) 29 | elif "weight" in name: 30 | # Caffe2 implementation uses MSRAFill, which in fact 31 | # corresponds to kaiming_normal_ in PyTorch 32 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 33 | 34 | def forward(self, x): 35 | x = F.relu(self.conv5_mask(x)) 36 | return self.mask_fcn_logits(x) 37 | 38 | 39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor} 40 | 41 | 42 | def make_roi_mask_predictor(cfg): 43 | func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 44 | return func(cfg) 45 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rbox_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rbox_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rbox_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg) 19 | self.predictor = make_roi_box_predictor(cfg) 20 | self.post_processor = make_roi_box_post_processor(cfg) 21 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 22 | 23 | self.cfg = cfg 24 | 25 | def forward(self, features, proposals, targets=None): 26 | """ 27 | Arguments: 28 | features (list[Tensor]): feature-maps from possibly several levels 29 | proposals (list[BoxList]): proposal boxes 30 | targets (list[BoxList], optional): the ground-truth targets. 31 | 32 | Returns: 33 | x (Tensor): the result of the feature extractor 34 | proposals (list[BoxList]): during training, the subsampled proposals 35 | are returned. During testing, the predicted boxlists are returned 36 | losses (dict[Tensor]): During training, returns the losses for the 37 | head. During testing, returns an empty dict. 38 | """ 39 | 40 | # if self.cfg.TEST.CASCADE: 41 | recur_iter = self.cfg.MODEL.ROI_HEADS.RECUR_ITER if self.cfg.TEST.CASCADE else 1 42 | 43 | recur_proposals = proposals 44 | x = None 45 | for i in range(recur_iter): 46 | 47 | if self.training: 48 | # Faster R-CNN subsamples during training the proposals with a fixed 49 | # positive / negative ratio 50 | with torch.no_grad(): 51 | recur_proposals = self.loss_evaluator.subsample(recur_proposals, targets) 52 | 53 | # extract features that will be fed to the final classifier. The 54 | # feature_extractor generally corresponds to the pooler + heads 55 | x = self.feature_extractor(features, recur_proposals) 56 | # final classifier that converts the features into predictions 57 | class_logits, box_regression = self.predictor(x) 58 | 59 | if not self.training: 60 | recur_proposals = self.post_processor((class_logits, box_regression), recur_proposals, recur_iter - i - 1) # result 61 | else: 62 | loss_classifier, loss_box_reg = self.loss_evaluator( 63 | [class_logits], [box_regression] 64 | ) 65 | if not self.training: 66 | return x, recur_proposals, {} 67 | 68 | return ( 69 | x, 70 | proposals, 71 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 72 | ) 73 | 74 | 75 | def build_roi_box_head(cfg): 76 | """ 77 | Constructs a new box head. 78 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 79 | and make it a parameter in the config 80 | """ 81 | return ROIBoxHead(cfg) 82 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rbox_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | import torch 4 | 5 | class FastRCNNPredictor(nn.Module): 6 | def __init__(self, config, pretrained=None): 7 | super(FastRCNNPredictor, self).__init__() 8 | 9 | stage_index = 4 10 | stage2_relative_factor = 2 ** (stage_index - 1) 11 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 12 | num_inputs = res2_out_channels * stage2_relative_factor 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | self.bbox_pred = nn.Linear(num_inputs, num_classes * 5) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | 29 | cls_logit = self.cls_score(x) 30 | bbox_pred = self.bbox_pred(x) 31 | 32 | return cls_logit, bbox_pred 33 | 34 | 35 | class FPNPredictor(nn.Module): 36 | def __init__(self, cfg): 37 | super(FPNPredictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 40 | 41 | self.cls_score = nn.Linear(representation_size, num_classes) 42 | self.bbox_pred = nn.Linear(representation_size, num_classes * 5) 43 | 44 | nn.init.normal_(self.cls_score.weight, std=0.01) 45 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 46 | for l in [self.cls_score, self.bbox_pred]: 47 | nn.init.constant_(l.bias, 0) 48 | 49 | def forward(self, x): 50 | scores = self.cls_score(x) 51 | bbox_deltas = self.bbox_pred(x) 52 | 53 | return scores, bbox_deltas 54 | 55 | 56 | _ROI_BOX_PREDICTOR = { 57 | "FastRCNNPredictor": FastRCNNPredictor, 58 | "FPNPredictor": FPNPredictor, 59 | } 60 | 61 | 62 | def make_roi_box_predictor(cfg): 63 | func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 64 | return func(cfg) 65 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rec_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rec_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rec_head/rec_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList, RBoxList 6 | 7 | from .roi_rec_feature_extractors import make_roi_rec_feature_extractor 8 | from .roi_rec_predictors import make_roi_rec_predictor 9 | from .inference import make_roi_rec_post_processor 10 | from .loss import make_roi_rec_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], RBoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIRecHead(torch.nn.Module): 37 | def __init__(self, cfg): 38 | super(ROIRecHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_rec_feature_extractor(cfg) 41 | self.predictor = make_roi_rec_predictor(cfg) 42 | self.post_processor = make_roi_rec_post_processor(cfg) 43 | self.loss_evaluator = make_roi_rec_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None): 46 | """ 47 | Arguments: 48 | features (list[Tensor]): feature-maps from possibly several levels 49 | proposals (list[BoxList]): proposal boxes 50 | targets (list[BoxList], optional): the ground-truth targets. 51 | 52 | Returns: 53 | x (Tensor): the result of the feature extractor 54 | proposals (list[BoxList]): during training, the original proposals 55 | are returned. During testing, the predicted boxlists are returned 56 | with the `mask` field set 57 | losses (dict[Tensor]): During training, returns the losses for the 58 | head. During testing, returns an empty dict. 59 | """ 60 | 61 | if self.training: 62 | # during training, only focus on positive boxes 63 | all_proposals = proposals 64 | proposals, positive_inds = keep_only_positive_boxes(proposals) 65 | if self.training and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 66 | x = features 67 | x = x[torch.cat(positive_inds, dim=0)] 68 | else: 69 | x = self.feature_extractor(features, proposals) 70 | rec_logits = self.predictor(x) 71 | 72 | if not self.training: 73 | result = self.post_processor(rec_logits, proposals) 74 | return x, result, {} 75 | 76 | loss_rec = self.loss_evaluator(proposals, rec_logits, targets) 77 | 78 | return x, all_proposals, dict(loss_rec=loss_rec) 79 | 80 | 81 | def build_roi_rec_head(cfg): 82 | return ROIRecHead(cfg) 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rmask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rmask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rmask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import RBoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], RBoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg) 41 | self.predictor = make_roi_mask_predictor(cfg) 42 | self.post_processor = make_roi_mask_post_processor(cfg) 43 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None): 46 | """ 47 | Arguments: 48 | features (list[Tensor]): feature-maps from possibly several levels 49 | proposals (list[BoxList]): proposal boxes 50 | targets (list[BoxList], optional): the ground-truth targets. 51 | 52 | Returns: 53 | x (Tensor): the result of the feature extractor 54 | proposals (list[BoxList]): during training, the original proposals 55 | are returned. During testing, the predicted boxlists are returned 56 | with the `mask` field set 57 | losses (dict[Tensor]): During training, returns the losses for the 58 | head. During testing, returns an empty dict. 59 | """ 60 | 61 | if self.training: 62 | # during training, only focus on positive boxes 63 | all_proposals = proposals 64 | proposals, positive_inds = keep_only_positive_boxes(proposals) 65 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 66 | x = features 67 | x = x[torch.cat(positive_inds, dim=0)] 68 | else: 69 | x = self.feature_extractor(features, proposals) 70 | mask_logits = self.predictor(x) 71 | 72 | if not self.training: 73 | result = self.post_processor(mask_logits, proposals) 74 | return x, result, {} 75 | 76 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 77 | 78 | return x, all_proposals, dict(loss_mask=loss_mask) 79 | 80 | 81 | def build_roi_mask_head(cfg): 82 | return ROIMaskHead(cfg) 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rmask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling.poolers import Pooler, PyramidRROIAlign 7 | from maskrcnn_benchmark.layers import Conv2d 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | class MaskRCNNFPNFeatureExtractor(nn.Module): 12 | """ 13 | Heads for FPN for classification 14 | """ 15 | 16 | def __init__(self, cfg): 17 | """ 18 | Arguments: 19 | num_classes (int): number of output classes 20 | input_size (int): number of channels of the input once it's flattened 21 | representation_size (int): size of the intermediate representation 22 | """ 23 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 24 | 25 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 26 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 27 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 28 | pooler = PyramidRROIAlign( 29 | output_size=(resolution, resolution), 30 | scales=scales, 31 | ) 32 | input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS 33 | self.pooler = pooler 34 | 35 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 36 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 37 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 38 | 39 | self.word_margin = cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN 40 | self.det_margin = cfg.MODEL.RRPN.GT_BOX_MARGIN 41 | 42 | self.rescale = self.word_margin / self.det_margin 43 | 44 | next_feature = input_size 45 | self.blocks = [] 46 | for layer_idx, layer_features in enumerate(layers, 1): 47 | layer_name = "mask_fcn{}".format(layer_idx) 48 | module = make_conv3x3(next_feature, layer_features, 49 | dilation=dilation, stride=1, use_gn=use_gn 50 | ) 51 | self.add_module(layer_name, module) 52 | next_feature = layer_features 53 | self.blocks.append(layer_name) 54 | 55 | def forward(self, x, proposals): 56 | x = self.pooler(x, proposals) 57 | 58 | # resize_proposals = [proposal.rescale(self.rescale) for proposal in proposals] 59 | # x = self.pooler(x, resize_proposals) 60 | 61 | for layer_name in self.blocks: 62 | x = F.relu(getattr(self, layer_name)(x)) 63 | 64 | return x 65 | 66 | 67 | _ROI_MASK_FEATURE_EXTRACTORS = { 68 | "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, 69 | "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor, 70 | } 71 | 72 | 73 | def make_roi_mask_feature_extractor(cfg): 74 | func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR] 75 | return func(cfg) 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rmask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | 8 | 9 | class MaskRCNNC4Predictor(nn.Module): 10 | def __init__(self, cfg): 11 | super(MaskRCNNC4Predictor, self).__init__() 12 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 13 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 14 | 15 | if cfg.MODEL.ROI_HEADS.USE_FPN: 16 | num_inputs = dim_reduced 17 | else: 18 | stage_index = 4 19 | stage2_relative_factor = 2 ** (stage_index - 1) 20 | res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 21 | num_inputs = res2_out_channels * stage2_relative_factor 22 | 23 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 24 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 25 | 26 | for name, param in self.named_parameters(): 27 | if "bias" in name: 28 | nn.init.constant_(param, 0) 29 | elif "weight" in name: 30 | # Caffe2 implementation uses MSRAFill, which in fact 31 | # corresponds to kaiming_normal_ in PyTorch 32 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 33 | 34 | def forward(self, x): 35 | x = F.relu(self.conv5_mask(x)) 36 | return self.mask_fcn_logits(x) 37 | 38 | 39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor} 40 | 41 | 42 | def make_roi_mask_predictor(cfg): 43 | func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 44 | return func(cfg) 45 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .mask_head.mask_head import build_roi_mask_head 6 | 7 | 8 | class CombinedROIHeads(torch.nn.ModuleDict): 9 | """ 10 | Combines a set of individual heads (for box prediction or masks) into a single 11 | head. 12 | """ 13 | 14 | def __init__(self, cfg, heads): 15 | super(CombinedROIHeads, self).__init__(heads) 16 | self.cfg = cfg.clone() 17 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 18 | self.mask.feature_extractor = self.box.feature_extractor 19 | 20 | def forward(self, features, proposals, targets=None): 21 | losses = {} 22 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 23 | x, detections, loss_box = self.box(features, proposals, targets) 24 | losses.update(loss_box) 25 | if self.cfg.MODEL.MASK_ON: 26 | mask_features = features 27 | # optimization: during training, if we share the feature extractor between 28 | # the box and the mask heads, then we can reuse the features already computed 29 | if ( 30 | self.training 31 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 32 | ): 33 | mask_features = x 34 | # During training, self.box() will return the unaltered proposals as "detections" 35 | # this makes the API consistent during training and testing 36 | x, detections, loss_mask = self.mask(mask_features, detections, targets) 37 | losses.update(loss_mask) 38 | return x, detections, losses 39 | 40 | 41 | def build_roi_heads(cfg): 42 | # individually create the heads, that will be combined together 43 | # afterwards 44 | roi_heads = [] 45 | if not cfg.MODEL.RPN_ONLY: 46 | roi_heads.append(("box", build_roi_box_head(cfg))) 47 | if cfg.MODEL.MASK_ON: 48 | roi_heads.append(("mask", build_roi_mask_head(cfg))) 49 | 50 | # combine individual heads in a single module 51 | if roi_heads: 52 | roi_heads = CombinedROIHeads(cfg, roi_heads) 53 | 54 | return roi_heads 55 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/rroi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .rbox_head.box_head import build_roi_box_head 5 | from .rec_head.rec_head import build_roi_rec_head 6 | from .rmask_head.mask_head import build_roi_mask_head 7 | 8 | class CombinedROIHeads(torch.nn.ModuleDict): 9 | """ 10 | Combines a set of individual heads (for box prediction or masks) into a single 11 | head. 12 | """ 13 | 14 | def __init__(self, cfg, heads): 15 | super(CombinedROIHeads, self).__init__(heads) 16 | self.cfg = cfg.clone() 17 | if cfg.MODEL.REC_ON and cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 18 | self.rec.feature_extractor = self.box.feature_extractor 19 | 20 | def forward(self, features, proposals, targets=None): 21 | losses = {} 22 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 23 | if self.cfg.MODEL.FP4P_ON: 24 | # get you C4 25 | x, detections, loss_box = self.box((features[-1], ), proposals, targets) 26 | else: 27 | x, detections, loss_box = self.box(features, proposals, targets) 28 | losses.update(loss_box) 29 | 30 | if self.cfg.MODEL.MASK_ON: 31 | mask_features = features 32 | # optimization: during training, if we share the feature extractor between 33 | # the box and the mask heads, then we can reuse the features already computed 34 | if ( 35 | self.training 36 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 37 | ): 38 | mask_features = x 39 | # During training, self.box() will return the unaltered proposals as "detections" 40 | # this makes the API consistent during training and testing 41 | # detach process 42 | mask_features_detach = [feature.detach() for feature in mask_features] 43 | x, detections, loss_mask = self.mask(mask_features_detach, detections, targets) 44 | losses.update(loss_mask) 45 | 46 | if self.cfg.MODEL.REC_ON: 47 | rec_features = features 48 | # optimization: during training, if we share the feature extractor between 49 | # the box and the mask heads, then we can reuse the features already computed 50 | if ( 51 | self.training 52 | and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 53 | ): 54 | rec_features = x 55 | # During training, self.box() will return the unaltered proposals as "detections" 56 | # this makes the API consistent during training and testing 57 | x, detections, loss_rec = self.rec(rec_features, detections, targets) 58 | losses.update(loss_rec) 59 | return x, detections, losses 60 | 61 | 62 | def build_roi_heads(cfg): 63 | # individually create the heads, that will be combined together 64 | # afterwards 65 | roi_heads = [] 66 | if not cfg.MODEL.RPN_ONLY: 67 | roi_heads.append(("box", build_roi_box_head(cfg))) 68 | if cfg.MODEL.REC_ON: 69 | roi_heads.append(("rec", build_roi_rec_head(cfg))) 70 | if cfg.MODEL.MASK_ON: 71 | roi_heads.append(("mask", build_roi_mask_head(cfg))) 72 | 73 | # combine individual heads in a single module 74 | if roi_heads: 75 | roi_heads = CombinedROIHeads(cfg, roi_heads) 76 | 77 | return roi_heads 78 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rrpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | 38 | # print('tensors:', tensors.size(), 'isinstance(tensors, torch.Tensor):', isinstance(tensors, torch.Tensor)) 39 | 40 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 41 | tensors = [tensors] 42 | 43 | if isinstance(tensors, ImageList): 44 | return tensors 45 | elif isinstance(tensors, torch.Tensor): 46 | # single tensor shape can be inferred 47 | assert tensors.dim() == 4 48 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 49 | return ImageList(tensors, image_sizes) 50 | elif isinstance(tensors, (tuple, list)): 51 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 52 | 53 | # TODO Ideally, just remove this and let me model handle arbitrary 54 | # input sizs 55 | if size_divisible > 0: 56 | import math 57 | 58 | stride = size_divisible 59 | max_size = list(max_size) 60 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 61 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 62 | max_size = tuple(max_size) 63 | 64 | batch_shape = (len(tensors),) + max_size 65 | batched_imgs = tensors[0].new(*batch_shape).zero_() 66 | for img, pad_img in zip(tensors, batched_imgs): 67 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 68 | 69 | image_sizes = [im.shape[-2:] for im in tensors] 70 | 71 | return ImageList(batched_imgs, image_sizes) 72 | else: 73 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 74 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, "log.txt")) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | from torch.utils.model_zoo import _download_url_to_file 6 | from torch.utils.model_zoo import urlparse 7 | from torch.utils.model_zoo import HASH_REGEX 8 | 9 | from maskrcnn_benchmark.utils.comm import is_main_process 10 | from maskrcnn_benchmark.utils.comm import synchronize 11 | 12 | 13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 14 | # but with a few improvements and modifications 15 | def cache_url(url, model_dir=None, progress=True): 16 | r"""Loads the Torch serialized object at the given URL. 17 | If the object is already present in `model_dir`, it's deserialized and 18 | returned. The filename part of the URL should follow the naming convention 19 | ``filename-.ext`` where ```` is the first eight or more 20 | digits of the SHA256 hash of the contents of the file. The hash is used to 21 | ensure unique names and to verify the contents of the file. 22 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 23 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 24 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 25 | Args: 26 | url (string): URL of the object to download 27 | model_dir (string, optional): directory in which to save the object 28 | progress (bool, optional): whether or not to display a progress bar to stderr 29 | Example: 30 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 31 | """ 32 | if model_dir is None: 33 | torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) 34 | model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) 35 | if not os.path.exists(model_dir): 36 | os.makedirs(model_dir) 37 | parts = urlparse(url) 38 | filename = os.path.basename(parts.path) 39 | if filename == "model_final.pkl": 40 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 41 | # so make the full path the filename by replacing / with _ 42 | filename = parts.path.replace("/", "_") 43 | cached_file = os.path.join(model_dir, filename) 44 | if not os.path.exists(cached_file) and is_main_process(): 45 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 46 | hash_prefix = HASH_REGEX.search(filename) 47 | if hash_prefix is not None: 48 | hash_prefix = hash_prefix.group(1) 49 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 50 | # which matches the hash PyTorch uses. So we skip the hash matching 51 | # if the hash_prefix is less than 6 characters 52 | if len(hash_prefix) < 6: 53 | hash_prefix = None 54 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 55 | synchronize() 56 | return cached_file 57 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/rec_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import collections 4 | 5 | 6 | class Coder: 7 | 8 | def __init__(self, alphabet_file): 9 | # All char in one line 10 | self.alphabet = open(alphabet_file, 'r').readlines()[0].replace('\n', '') 11 | self.dictionary = {} 12 | self.label_to_char = {} 13 | 14 | cnt = 1 15 | for ch in self.alphabet: 16 | self.dictionary[ch] = cnt 17 | self.label_to_char[cnt] = ch 18 | cnt += 1 19 | 20 | def encode(self, word_str): 21 | 22 | labels = [] 23 | for ch in word_str: 24 | if ch in self.alphabet: 25 | labels.append(self.dictionary[ch]) 26 | 27 | return labels 28 | 29 | def decode(self, labels): 30 | 31 | dec_str = '' 32 | for label in labels: 33 | if label in self.label_to_char: 34 | dec_str += self.label_to_char[label] 35 | 36 | return dec_str 37 | 38 | class StrLabelConverter(object): 39 | 40 | def __init__(self, alphabet): 41 | self.alphabet = alphabet + '-' # for `-1` index 42 | 43 | self.dict = {} 44 | for i, char in enumerate(alphabet): 45 | # NOTE: 0 is reserved for 'blank' required by wrap_ctc 46 | self.dict[char] = i + 1 47 | print('------------------- alphabet -------------------') 48 | print('alphabet:', self.alphabet) 49 | print('------------------------------------------------') 50 | def encode(self, text, depth=0): 51 | """Support batch or single str.""" 52 | if isinstance(text, str): 53 | for char in text: 54 | if self.alphabet.find(char) == -1: 55 | print(char) 56 | text = [self.dict[char] for char in text] 57 | length = [len(text)] 58 | elif isinstance(text, collections.Iterable): 59 | length = [len(s) for s in text] 60 | text = ''.join(text) 61 | text, _ = self.encode(text) 62 | 63 | if depth: 64 | return text, len(text) 65 | #return (torch.IntTensor(text), torch.IntTensor(length)) 66 | return (text, length) 67 | 68 | def decode(self, t, length, raw=False): 69 | if length.numel() == 1: 70 | length = length[0] 71 | t = t[:length] 72 | if raw: 73 | return ''.join([self.alphabet[i - 1] for i in t]) 74 | else: 75 | char_list = [] 76 | for i in range(length): 77 | if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): 78 | char_list.append(self.alphabet[t[i] - 1]) 79 | return ''.join(char_list) 80 | else: 81 | texts = [] 82 | index = 0 83 | for i in range(length.numel()): 84 | l = length[i] 85 | texts.append(self.decode( 86 | t[index:index + l], torch.IntTensor([l]), raw=raw)) 87 | index += l 88 | return texts 89 | 90 | 91 | if __name__ == '__main__': 92 | 93 | alpha_f = 'alpha.txt' 94 | coder = Coder(alpha_f) 95 | 96 | words = ['shits', 'bull', 'fXxk'] 97 | 98 | for w in words: 99 | code = coder.encode(w) 100 | print('code:', code) 101 | word = coder.decode(code) 102 | print('word:', word) 103 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /rotation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/rotation/__init__.py -------------------------------------------------------------------------------- /rotation/rbbox_overlaps.hpp: -------------------------------------------------------------------------------- 1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id); 2 | 3 | -------------------------------------------------------------------------------- /rotation/rbbox_overlaps.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | cdef extern from "rbbox_overlaps.hpp": 5 | void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int) 6 | 7 | def rbbx_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0): 8 | cdef int N = boxes.shape[0] 9 | cdef int K = query_boxes.shape[0] 10 | cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32) 11 | _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id) 12 | return overlaps 13 | 14 | 15 | -------------------------------------------------------------------------------- /rotation/rotate_circle_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | def a(np.float x,np.float r): 5 | return 0.5*3.1415926535*r*r-x*np.sqrt(r*r-x*x) - r*r*np.arcsin(x/r) 6 | 7 | 8 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 9 | cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0] 10 | cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1] 11 | cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2] 12 | cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3] 13 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5] 14 | 15 | cdef np.ndarray[np.float32_t, ndim=1] rs = np.sqrt(heights**2+widths**2)/2.0 16 | cdef np.ndarray[np.float32_t, ndim=1] areas = 3.1415926535*rs*rs 17 | 18 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 19 | cdef int ndets = dets.shape[0] 20 | cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int) 21 | 22 | # nominal indices 23 | cdef int _i, _j 24 | # sorted indices 25 | cdef int i, j 26 | # temp variables for box i's (the box currently under consideration) 27 | cdef np.float32_t ix_ctr, iy_ctr, ir, iarea 28 | # variables for computing overlap with box j (lower scoring box) 29 | cdef np.float32_t xx_ctr, yy_ctr,rr 30 | cdef np.float32_t inter, ovr 31 | 32 | cdef np.float32_t d,x1,x2,s,r1,r2 33 | keep = [] 34 | for _i in range(ndets): 35 | i = order[_i] 36 | if suppressed[i] == 1: 37 | continue 38 | keep.append(i) 39 | ix_ctr = x_ctrs[i] 40 | iy_ctr = y_ctrs[i] 41 | ir = rs[i] 42 | iarea = areas[i] 43 | 44 | for _j in range(_i+1,ndets): 45 | j = order[_j] 46 | if suppressed[j] == 1: 47 | continue 48 | xx_ctr = x_ctrs[j] 49 | yy_ctr = y_ctrs[j] 50 | rr = rs[j] 51 | 52 | d = np.sqrt((ix_ctr-xx_ctr)**2+(iy_ctr-yy_ctr)**2) 53 | 54 | if ir<=rr: 55 | r1 = ir 56 | r2 = rr 57 | else: 58 | r1 = rr 59 | r2 = ir 60 | 61 | if d > 0.0: 62 | x1 = (d*d+r1*r1-r2*r2)/(2*d) 63 | x2 = (d*d+r2*r2-r1*r1)/(2*d) 64 | s = (r2*r2-r1*r1-d*d)/(2*d) 65 | #else: Avoid Warning 66 | # x1 = 0 67 | # x2 = 0 68 | # s = 0 69 | 70 | if d<=r2-r1: 71 | inter = 3.1415926535*r1*r1 72 | elif d>=r2+r1 or r2 == 0 or r1 == 0: 73 | inter = 0.0 74 | else: 75 | if d*d=thresh: 84 | suppressed[j]=1 85 | return keep 86 | -------------------------------------------------------------------------------- /rotation/rotate_cpu_nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import time 4 | import math 5 | def rotate_cpu_nms(dets, threshold): 6 | ''' 7 | Parameters 8 | ---------------- 9 | dets: (N, 6) --- x_ctr, y_ctr, height, width, angle, score 10 | threshold: 0.7 or 0.5 IoU 11 | ---------------- 12 | Returns 13 | ---------------- 14 | keep: keep the remaining index of dets 15 | ''' 16 | keep = [] 17 | scores = dets[:, -1] 18 | 19 | tic = time.time() 20 | 21 | order = scores.argsort()[::-1] 22 | ndets = dets.shape[0] 23 | print "nms start" 24 | print ndets 25 | suppressed = np.zeros((ndets), dtype = np.int) 26 | 27 | 28 | 29 | for _i in range(ndets): 30 | i = order[_i] 31 | if suppressed[i] == 1: 32 | continue 33 | keep.append(i) 34 | r1 = ((dets[i,0],dets[i,1]),(dets[i,3],dets[i,2]),dets[i,4]) 35 | area_r1 = dets[i,2]*dets[i,3] 36 | for _j in range(_i+1,ndets): 37 | #tic = time.time() 38 | j = order[_j] 39 | if suppressed[j] == 1: 40 | continue 41 | r2 = ((dets[j,0],dets[j,1]),(dets[j,3],dets[j,2]),dets[j,4]) 42 | area_r2 = dets[j,2]*dets[j,3] 43 | ovr = 0.0 44 | #+++ 45 | #d = math.sqrt((dets[i,0] - dets[j,0])**2 + (dets[i,1] - dets[j,1])**2) 46 | #d1 = math.sqrt(dets[i,2]**2 + dets[i,3]**2) 47 | #d2 = math.sqrt(dets[j,2]**2 + dets[j,3]**2) 48 | #if d=threshold: 61 | suppressed[j]=1 62 | #print t1 - tic, t2 - t1, t3 - t2 63 | #print 64 | print time.time() - tic 65 | print "nms done" 66 | return keep 67 | 68 | 69 | 70 | if __name__ == "__main__": 71 | 72 | boxes = np.array([ 73 | [50, 50, 100, 100, 0,0.99], 74 | [60, 60, 100, 100, 0,0.88],#keep 0.68 75 | [50, 50, 100, 100, 45.0,0.66],#discard 0.70 76 | [200, 200, 100, 100, 0,0.77],#keep 0.0 77 | 78 | ]) 79 | 80 | #boxes = np.tile(boxes, (4500 / 4, 1)) 81 | 82 | #for ind in range(4500): 83 | # boxes[ind, 5] = 0 84 | 85 | a = rotate_cpu_nms(boxes, 0.7) 86 | 87 | print boxes[a] 88 | -------------------------------------------------------------------------------- /rotation/rotate_cpython_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | import cv2 4 | cimport cv2 5 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 6 | cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0] 7 | cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1] 8 | cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2] 9 | cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3] 10 | cdef np.ndarray[np.float32_t, ndim=1] angles = dets[:, 4] 11 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5] 12 | cdef np.ndarray[np.float32_t, ndim=1] areas = heights * widths 13 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 14 | cdef int ndets = dets.shape[0] 15 | cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int) 16 | # nominal indices 17 | cdef int _i, _j 18 | # sorted indices 19 | cdef int i, j 20 | # temp variables for box i's (the box currently under consideration) 21 | cdef np.float32_t ix_ctr, iy_ctr, ih, iw, ia, iarea 22 | # variables for computing overlap with box j (lower scoring box) 23 | cdef np.float32_t xx_ctr, yy_ctr, hh, ww, aa 24 | cdef np.float32_t inter, ovr 25 | keep = [] 26 | for _i in range(ndets): 27 | i = order[_i] 28 | if suppressed[i] == 1: 29 | continue 30 | keep.append(i) 31 | ix_ctr = x_ctrs[i] 32 | iy_ctr = y_ctrs[i] 33 | ih = heights[i] 34 | iw = widths[i] 35 | ia = angles[i] 36 | iarea = areas[i] 37 | r1 = ((ix_ctr, iy_ctr), (ih, iw), ia) 38 | for _j in range(_i+1,ndets): 39 | j = order[_j] 40 | if suppressed[j] == 1: 41 | continue 42 | xx_ctr = x_ctrs[j] 43 | yy_ctr = y_ctrs[j] 44 | hh = heights[j] 45 | ww = widths[j] 46 | aa = angles[j] 47 | r2 = ((xx_ctr,yy_ctr),(ww,hh),aa) 48 | ovr = 0.0 49 | int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1] 50 | if None != int_pts: 51 | order_pts = cv2.convexHull(int_pts, returnPoints = True) 52 | inter = cv2.contourArea(order_pts) 53 | ovr = int_area*1.0 / (r1+areas[j]-inter) 54 | if ovr>=thresh: 55 | suppressed[j]=1 56 | return keep 57 | -------------------------------------------------------------------------------- /rotation/rotate_cython_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | import cv2 4 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 5 | cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0] 6 | cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1] 7 | cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2] 8 | cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3] 9 | cdef np.ndarray[np.float32_t, ndim=1] angles = dets[:, 4] 10 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5] 11 | cdef np.ndarray[np.float32_t, ndim=1] areas = heights * widths 12 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 13 | cdef int ndets = dets.shape[0] 14 | cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int) 15 | # nominal indices 16 | cdef int _i, _j 17 | # sorted indices 18 | cdef int i, j 19 | # temp variables for box i's (the box currently under consideration) 20 | cdef np.float32_t ix_ctr, iy_ctr, ih, iw, ia, iarea 21 | # variables for computing overlap with box j (lower scoring box) 22 | cdef np.float32_t xx_ctr, yy_ctr, hh, ww, aa 23 | cdef np.float32_t inter, ovr 24 | keep = [] 25 | for _i in range(ndets): 26 | i = order[_i] 27 | if suppressed[i] == 1: 28 | continue 29 | keep.append(i) 30 | ix_ctr = x_ctrs[i] 31 | iy_ctr = y_ctrs[i] 32 | ih = heights[i] 33 | iw = widths[i] 34 | ia = angles[i] 35 | iarea = areas[i] 36 | r1 = ((ix_ctr, iy_ctr), (ih, iw), ia) 37 | for _j in range(_i+1,ndets): 38 | j = order[_j] 39 | if suppressed[j] == 1: 40 | continue 41 | xx_ctr = x_ctrs[j] 42 | yy_ctr = y_ctrs[j] 43 | hh = heights[j] 44 | ww = widths[j] 45 | aa = angles[j] 46 | r2 = ((xx_ctr,yy_ctr),(ww,hh),aa) 47 | ovr = 0.0 48 | int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1] 49 | if None != int_pts: 50 | order_pts = cv2.convexHull(int_pts, returnPoints = True) 51 | inter = cv2.contourArea(order_pts) 52 | ovr = inter*1.0 / (iarea+areas[j]-inter) 53 | if ovr>=thresh: 54 | suppressed[j]=1 55 | return keep 56 | -------------------------------------------------------------------------------- /rotation/rotate_gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /rotation/rotate_gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "rotate_gpu_nms.hpp": 7 | void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 10 | np.int32_t device_id=0): 11 | cdef int boxes_num = dets.shape[0] 12 | cdef int boxes_dim = dets.shape[1] 13 | cdef int num_out 14 | cdef np.ndarray[np.int32_t, ndim=1] \ 15 | keep = np.zeros(boxes_num, dtype=np.int32) 16 | cdef np.ndarray[np.float32_t, ndim=1] \ 17 | scores = dets[:, 5] 18 | cdef np.ndarray[np.int_t, ndim=1] \ 19 | order = scores.argsort()[::-1] 20 | cdef np.ndarray[np.float32_t, ndim=2] \ 21 | sorted_dets = dets[order, :] 22 | _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 23 | keep = keep[:num_out] 24 | return list(order[keep]) 25 | -------------------------------------------------------------------------------- /rotation/rotate_polygon_nms.hpp: -------------------------------------------------------------------------------- 1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /rotation/rotate_polygon_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "rotate_gpu_nms.hpp": 7 | void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 10 | np.int32_t device_id=0): 11 | cdef int boxes_num = dets.shape[0] 12 | cdef int boxes_dim = dets.shape[1] 13 | cdef int num_out 14 | cdef np.ndarray[np.int32_t, ndim=1] \ 15 | keep = np.zeros(boxes_num, dtype=np.int32) 16 | cdef np.ndarray[np.float32_t, ndim=1] \ 17 | scores = dets[:, 5] 18 | cdef np.ndarray[np.int_t, ndim=1] \ 19 | order = scores.argsort()[::-1] 20 | cdef np.ndarray[np.float32_t, ndim=2] \ 21 | sorted_dets = dets[order, :] 22 | _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 23 | keep = keep[:num_out] 24 | return list(order[keep]) 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /tests/test_metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 5 | 6 | 7 | class TestMetricLogger(unittest.TestCase): 8 | def test_update(self): 9 | meter = MetricLogger() 10 | for i in range(10): 11 | meter.update(metric=float(i)) 12 | 13 | m = meter.meters["metric"] 14 | self.assertEqual(m.count, 10) 15 | self.assertEqual(m.total, 45) 16 | self.assertEqual(m.median, 4) 17 | self.assertEqual(m.avg, 4.5) 18 | 19 | def test_no_attr(self): 20 | meter = MetricLogger() 21 | _ = meter.meters 22 | _ = meter.delimiter 23 | def broken(): 24 | _ = meter.not_existent 25 | self.assertRaises(AttributeError, broken) 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Convert instances from png files to a dictionary 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111 5 | 6 | from __future__ import print_function, absolute_import, division 7 | import os, sys 8 | 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 10 | from csHelpers import * 11 | 12 | # Cityscapes imports 13 | from cityscapesscripts.evaluation.instance import * 14 | from cityscapesscripts.helpers.csHelpers import * 15 | import cv2 16 | from maskrcnn_benchmark.utils import cv2_util 17 | 18 | 19 | def instances2dict_with_polygons(imageFileList, verbose=False): 20 | imgCount = 0 21 | instanceDict = {} 22 | 23 | if not isinstance(imageFileList, list): 24 | imageFileList = [imageFileList] 25 | 26 | if verbose: 27 | print("Processing {} images...".format(len(imageFileList))) 28 | 29 | for imageFileName in imageFileList: 30 | # Load image 31 | img = Image.open(imageFileName) 32 | 33 | # Image as numpy array 34 | imgNp = np.array(img) 35 | 36 | # Initialize label categories 37 | instances = {} 38 | for label in labels: 39 | instances[label.name] = [] 40 | 41 | # Loop through all instance ids in instance image 42 | for instanceId in np.unique(imgNp): 43 | if instanceId < 1000: 44 | continue 45 | instanceObj = Instance(imgNp, instanceId) 46 | instanceObj_dict = instanceObj.toDict() 47 | 48 | #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict()) 49 | if id2label[instanceObj.labelID].hasInstances: 50 | mask = (imgNp == instanceId).astype(np.uint8) 51 | contour, hier = cv2_util.findContours( 52 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 53 | 54 | polygons = [c.reshape(-1).tolist() for c in contour] 55 | instanceObj_dict['contours'] = polygons 56 | 57 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 58 | 59 | imgKey = os.path.abspath(imageFileName) 60 | instanceDict[imgKey] = instances 61 | imgCount += 1 62 | 63 | if verbose: 64 | print("\rImages Processed: {}".format(imgCount), end=' ') 65 | sys.stdout.flush() 66 | 67 | if verbose: 68 | print("") 69 | 70 | return instanceDict 71 | 72 | def main(argv): 73 | fileList = [] 74 | if (len(argv) > 2): 75 | for arg in argv: 76 | if ("png" in arg): 77 | fileList.append(arg) 78 | instances2dict_with_polygons(fileList, True) 79 | 80 | if __name__ == "__main__": 81 | main(sys.argv[1:]) 82 | --------------------------------------------------------------------------------