├── .flake8
├── .gitignore
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── configs
    ├── caffe2
    │   ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    ├── cityscapes
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x_ICDAR13.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml
    ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
    ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_50_C4_1x.yaml
    ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_rrpn_R_50_C4_1x_ICDAR13_15_trial.yaml
    ├── e2e_rrpn_R_50_C4_1x_ICDAR13_test.yaml
    ├── e2e_rrpn_R_50_C4_1x_ICDAR13_toy.yaml
    ├── gn_baselines
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    ├── pascal_voc
    │   ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── quick_schedules
    │   ├── e2e_faster_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── rpn_R_50_C4_quick.yaml
    │   └── rpn_R_50_FPN_quick.yaml
    └── rrpn
    │   ├── e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml
    │   └── e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml
├── demo
    ├── ICDAR_demo.py
    ├── README.md
    ├── RRPN_Demo.py
    ├── __init__.py
    ├── link_boxes.py
    ├── polygon_wrapper.py
    ├── predictor.py
    ├── results
    │   ├── rrpn1.png
    │   ├── rrpn2.png
    │   ├── rrpn3.png
    │   ├── rrpn4.png
    │   └── rrpn5.png
    ├── rrpn_e2e_infer.py
    └── webcam.py
├── docker
    ├── Dockerfile
    └── docker-jupyter
    │   ├── Dockerfile
    │   └── jupyter_notebook_config.py
├── maskrcnn_benchmark
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── RROIAlign.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── RROIAlign_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── nms.h
    │   └── vision.cpp
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── build.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── concat_dataset.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── coco_eval.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── voc_eval.py
    │   │   ├── icdar_series.py
    │   │   ├── list_dataset.py
    │   │   ├── rotation_mask_datasets.py
    │   │   ├── rotation_series.py
    │   │   ├── rrpn_e2e_series.py
    │   │   └── voc.py
    │   ├── icdar_series.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   ├── rroi_align.py
    │   ├── rroi_pool.py
    │   └── smooth_l1_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── arpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── rpn.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   ├── generalized_rcnn.py
    │   │   └── generalized_rrpn_rcnn.py
    │   ├── make_layers.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── rbox_coder.py
    │   ├── registry.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   ├── rbox_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── rec_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── rec_head.py
    │   │   │   ├── roi_rec_feature_extractors.py
    │   │   │   └── roi_rec_predictors.py
    │   │   ├── rmask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   ├── roi_heads.py
    │   │   └── rroi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── rpn.py
    │   ├── rrpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── rrpn.py
    │   └── utils.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   ├── rboxlist_ops.py
    │   ├── segmentation_for_rbox.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── cv2_util.py
    │   ├── env.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── model_zoo.py
    │   ├── rec_utils.py
    │   ├── registry.py
    │   └── visualize.py
├── rotation
    ├── __init__.py
    ├── rbbox_overlaps.cpp
    ├── rbbox_overlaps.hpp
    ├── rbbox_overlaps.pyx
    ├── rbbox_overlaps_kernel.cu
    ├── rotate_circle_nms.c
    ├── rotate_circle_nms.pyx
    ├── rotate_cpu_nms.py
    ├── rotate_cpython_nms.pyx
    ├── rotate_cython_nms.c
    ├── rotate_cython_nms.pyx
    ├── rotate_gpu_nms.cpp
    ├── rotate_gpu_nms.hpp
    ├── rotate_gpu_nms.pyx
    ├── rotate_nms_kernel.cu
    ├── rotate_polygon_nms.cpp
    ├── rotate_polygon_nms.hpp
    ├── rotate_polygon_nms.pyx
    └── rotate_polygon_nms_kernel.cu
├── rotation_setup.py
├── setup.py
├── tests
    ├── checkpoint.py
    ├── test_data_samplers.py
    └── test_metric_logger.py
└── tools
    ├── cityscapes
        ├── convert_cityscapes_to_coco.py
        └── instances2dict_with_polygons.py
    ├── test_net.py
    └── train_net.py


/.flake8:
--------------------------------------------------------------------------------
1 | # This is an example .flake8 config, used when developing *Black* itself.
2 | # Keep in sync with setup.cfg which is used for source packages.
3 | 
4 | [flake8]
5 | ignore = E203, E266, E501, W503
6 | max-line-length = 80
7 | max-complexity = 18
8 | select = B,C,E,F,W,T4,B9
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | _ext
 4 | *.pyc
 5 | *.so
 6 | maskrcnn_benchmark.egg-info/
 7 | build/
 8 | dist/
 9 | 
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 | 
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 | 
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 | 
25 | # Pycharm editor settings
26 | .idea
27 | 
28 | # project dirs
29 | /datasets
30 | /models
31 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements:
 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/
 5 | - torchvision from master
 6 | - cocoapi
 7 | - yacs
 8 | - matplotlib
 9 | - GCC >= 4.9
10 | - (optional) OpenCV for the webcam demo
11 | 
12 | 
13 | 
14 | ```bash
15 | # first, make sure that your conda is setup properly with the right environment
16 | # for that, check that `which conda`, `which pip` and `which python` points to the
17 | # right path. From a clean conda env, this is what you need to do
18 | 
19 | conda create --name rrpn_pytorch
20 | source activate rrpn_pytorch
21 | 
22 | # this installs the right pip and dependencies for the fresh python
23 | conda install ipython
24 | 
25 | # maskrcnn_benchmark and coco api dependencies
26 | pip install ninja yacs cython matplotlib
27 | 
28 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
29 | # we give the instructions for CUDA 9.0
30 | conda install pytorch
31 | 
32 | # install torchvision
33 | cd ~/github
34 | git clone https://github.com/pytorch/vision.git
35 | cd vision
36 | python setup.py install
37 | 
38 | # install pycocotools
39 | cd ~/github
40 | git clone https://github.com/cocodataset/cocoapi.git
41 | cd cocoapi/PythonAPI
42 | python setup.py build_ext install
43 | 
44 | # install PyTorch Detection
45 | cd ~/github
46 | git clone https://github.com/mjq11302010044/RRPN_pytorch.git
47 | cd RRPN_pytorch
48 | # the following will install the lib with
49 | # symbolic links, so that you can modify
50 | # the files if you want and won't need to
51 | # re-build it
52 | python setup.py build develop
53 | 
54 | #-------
55 | python rotation_setup.py install
56 | mv build/lib/rotation/*.so ./rotation
57 | #-------
58 | 
59 | # or if you are on macOS
60 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
61 | ```
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Facebook
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RRPN_pytorch
 2 | RRPN in pytorch, which is implemented under framework of facebook's benchmark: https://github.com/facebookresearch/maskrcnn-benchmark. 
 3 | Its caffe version can be viewed at: https://github.com/mjq11302010044/RRPN.
 4 | 
 5 | This repo has been deprecated, please see our new repo RRPN++ (https://github.com/mjq11302010044/RRPN_plusplus).
 6 | 
 7 | ![alt text](demo/results/rrpn3.png "Results from IC15 testing set")
 8 | 
 9 | ## Highlights
10 | - **From original repo:** In pytorch 1.0, Somehow faster than original repo in both training and inference.
11 | - **Training and evaluation checked:** Testing in IC15 with training data in {IC13, IC15, IC17mlt}, and receives Fscore of 83% vs. 81% in caffe repo.
12 | - **What's new:** RRoI Pooling is replaced with RRoI Alignment(bilinear interpolation for sampling), FPN structure supported, easy to change various backbones for different purposes.
13 | 
14 | ## Installation
15 | 
16 | Check [INSTALL.md](INSTALL.md) for installation instructions.
17 | 
18 | ## Configuring your dataset
19 | - Your dataset path can be set in `$RRPN_ROOT/maskrcnn_benchmark/config/paths_catalog.py`. We implemented interface for {IC13, IC15, IC17mlt, LSVT, ArT} for common use(Start from line 96):
20 | ```bash
21 | ...
22 |  "RRPN_train": {  # including IC13 and IC15
23 |             'dataset_list':{
24 |                 # 'IC13': 'Your dataset path',
25 |                 ...
26 |             },
27 |             "split": 'train'
28 |         },
29 | ...
30 | ```
31 | - Add your dataset?
32 | You need to form a dict array as follows:
33 | ```bash
34 | im_info = {
35 |     'gt_classes': your class_id array,
36 |     'max_classes': your class_id array,
37 |     'image': path to access one image,
38 |     'boxes': rotate box in {cx, cy, w, h, θ},
39 |     'flipped': Not supported, just False, 
40 |     'gt_overlaps': overlaps fill with 1 (gt with gt),
41 |     'seg_areas': H * W for an rbox,
42 |     'height': height of an image,
43 |     'width': width of an image,
44 |     'max_overlaps': overlaps fill with 1 (gt with gt),
45 |     'rotated': just True
46 | }
47 | ```
48 | Examples can be seen in `$RRPN_ROOT/maskrcnn_benchmark/data/rotation_series.py`
49 | Your data API should be add to the variable `DATASET`:
50 | ```bash
51 | DATASET = {
52 |     'IC13':get_ICDAR2013,
53 |     'IC15':get_ICDAR2015_RRC_PICK_TRAIN,
54 |     'IC17mlt':get_ICDAR2017_mlt,
55 |     ...
56 |     'Your Dataset Name': 'Your Dataset API'
57 | }
58 | ```
59 | 
60 | ## Training 
61 | ```bash
62 | # create your data cache directory
63 | mkdir data_cache
64 | ```
65 | ```bash
66 | # In your root of RRPN
67 | python tools/train_net.py --config-file=configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml
68 | ```
69 | - Multi-GPU phase is not testing yet, be careful to use GPU more than 1.
70 | 
71 | ## Testing
72 | - Using `$RRPN_ROOT/demo/RRPN_Demo.py` to test images you want. The demo will generate a text for your detected coodinates.
73 | - Showing the detected image by ture the variable `vis` to True.
74 | 
75 | ## Final 
76 | - Enjoy it with all the codes.
77 | - Citing us if you find it work in your projects.
78 | ```
79 | @misc{ma2019rrpn,
80 |     author = {Jianqi Ma},
81 |     title = {{RRPN in pytorch}},
82 |     year = {2019},
83 |     howpublished = {\url{https://github.com/mjq11302010044/RRPN_pytorch}},
84 | }
85 | @article{Jianqi17RRPN,
86 |     Author = {Jianqi Ma and Weiyuan Shao and Hao Ye and Li Wang and Hong Wang and Yingbin Zheng and Xiangyang Xue},
87 |     Title = {Arbitrary-Oriented Scene Text Detection via Rotation Proposals},
88 |     journal = {IEEE Transactions on Multimedia},
89 |     volume={20}, 
90 |     number={11}, 
91 |     pages={3111-3122}, 
92 |     year={2018}
93 | }
94 | ```
95 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TEST: ("coco_2014_minival",)
24 | DATALOADER:
25 |   SIZE_DIVISIBILITY: 32
26 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 |   META_ARCHITECTURE: "GeneralizedRCNN"
3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 |   TEST: ("coco_2014_minival",)
6 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TEST: ("coco_2014_minival",)
24 | DATALOADER:
25 |   SIZE_DIVISIBILITY: 32
26 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   RESNETS:
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 |   SIZE_DIVISIBILITY: 32
30 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TEST: ("coco_2014_minival",)
33 | DATALOADER:
34 |   SIZE_DIVISIBILITY: 32
35 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
 4 |   ROI_MASK_HEAD:
 5 |     PREDICTOR: "MaskRCNNC4Predictor"
 6 |     SHARE_BOX_FEATURE_EXTRACTOR: True
 7 |   MASK_ON: True
 8 | DATASETS:
 9 |   TEST: ("coco_2014_minival",)
10 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TEST: ("coco_2014_minival",)
33 | DATALOADER:
34 |   SIZE_DIVISIBILITY: 32
35 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   RESNETS:
31 |     STRIDE_IN_1X1: False
32 |     NUM_GROUPS: 32
33 |     WIDTH_PER_GROUP: 8
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |     NUM_CLASSES: 9
23 | DATASETS:
24 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
25 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.01
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (18000,)
32 |   MAX_ITER: 24000
33 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |     NUM_CLASSES: 9
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
34 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (18000,)
41 |   MAX_ITER: 24000
42 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | SOLVER:
28 |   BASE_LR: 0.02
29 |   WEIGHT_DECAY: 0.0001
30 |   STEPS: (60000, 80000)
31 |   MAX_ITER: 90000
32 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
 9 |   TEST: ("coco_2014_minival",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 8
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("ICDAR2013Dataset", )
 9 |   TEST: ("ICDAR2013Dataset",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (20000, 60000)
14 |   MAX_ITER: 80000
15 |   IMS_PER_BATCH: 1
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "../model_0020000.pth"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("ICDAR2013Dataset", )
 9 |   TEST: ("ICDAR2013Dataset",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (20000, 60000)
14 |   MAX_ITER: 80000
15 |   IMS_PER_BATCH: 4
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | SOLVER:
28 |   BASE_LR: 0.02
29 |   WEIGHT_DECAY: 0.0001
30 |   STEPS: (60000, 80000)
31 |   MAX_ITER: 90000
32 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   RESNETS:
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 |   SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 |   BASE_LR: 0.01
33 |   WEIGHT_DECAY: 0.0001
34 |   STEPS: (120000, 160000)
35 |   MAX_ITER: 180000
36 |   IMS_PER_BATCH: 8
37 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   BASE_LR: 0.02
38 |   WEIGHT_DECAY: 0.0001
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_MASK_HEAD:
 8 |     PREDICTOR: "MaskRCNNC4Predictor"
 9 |     SHARE_BOX_FEATURE_EXTRACTOR: True
10 |   MASK_ON: True
11 | DATASETS:
12 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 |   TEST: ("coco_2014_minival",)
14 | SOLVER:
15 |   BASE_LR: 0.01
16 |   WEIGHT_DECAY: 0.0001
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   IMS_PER_BATCH: 8
20 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   BASE_LR: 0.02
38 |   WEIGHT_DECAY: 0.0001
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   RESNETS:
31 |     STRIDE_IN_1X1: False
32 |     NUM_GROUPS: 32
33 |     WIDTH_PER_GROUP: 8
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 |   TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.01
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (120000, 160000)
44 |   MAX_ITER: 180000
45 |   IMS_PER_BATCH: 8
46 | 


--------------------------------------------------------------------------------
/configs/e2e_rrpn_R_50_C4_1x_ICDAR13_15_trial.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RRPN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |     RPN_HEAD: "SingleConvRRPNHead"
 8 | 
 9 |   ROI_BOX_HEAD:
10 |     NUM_CLASSES: 2
11 |   ROI_HEADS:
12 |     BATCH_SIZE_PER_IMAGE: 256
13 | 
14 | DATASETS:
15 |   TRAIN: ("RRPN_train", )
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (50000, 100000)
20 |   MAX_ITER: 150000
21 |   IMS_PER_BATCH: 1
22 | 
23 | OUTPUT_DIR: './models/IC-13-15-Trial/'
24 | 


--------------------------------------------------------------------------------
/configs/e2e_rrpn_R_50_C4_1x_ICDAR13_test.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 
 2 |   META_ARCHITECTURE: "RRPN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |     RPN_HEAD: "SingleConvRRPNHead"
 8 | 
 9 |   ROI_BOX_HEAD:
10 |     NUM_CLASSES: 2
11 |   ROI_HEADS:
12 |     BATCH_SIZE_PER_IMAGE: 128
13 | 
14 | DATASETS:
15 |   TRAIN: ("RRPN_train", )
16 | SOLVER:
17 |   BASE_LR: 0.003
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (20000, 60000)
20 |   MAX_ITER: 80000
21 |   IMS_PER_BATCH: 1
22 | 
23 | 


--------------------------------------------------------------------------------
/configs/e2e_rrpn_R_50_C4_1x_ICDAR13_toy.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RRPN"
 3 |   WEIGHT: "./model_0030000.pth"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |     RPN_HEAD: "SingleConvRRPNHead"
 8 | 
 9 |   ROI_BOX_HEAD:
10 |     NUM_CLASSES: 2
11 | 
12 | DATALOADER:
13 |   SIZE_DIVISIBILITY: 1
14 | 
15 | DATASETS:
16 |   TRAIN: ("RRPN_train", )
17 |   TEST: ("RRPN_train",)
18 | SOLVER:
19 |   BASE_LR: 0.003
20 |   WEIGHT_DECAY: 0.0001
21 |   STEPS: (20000, 60000)
22 |   MAX_ITER: 80000
23 |   IMS_PER_BATCH: 1
24 | 
25 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |   RESNETS: # use GN for backbone
13 |     TRANS_FUNC: "BottleneckWithGN"
14 |     STEM_FUNC: "StemWithGN"
15 |   FPN:
16 |     USE_GN: True # use GN for FPN
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |     BATCH_SIZE_PER_IMAGE: 512
27 |     POSITIVE_FRACTION: 0.25
28 |   ROI_BOX_HEAD:
29 |     USE_GN: True # use GN for bbox head
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
32 |     POOLER_SAMPLING_RATIO: 2
33 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
34 |     PREDICTOR: "FPNPredictor"
35 | DATASETS:
36 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 |   TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   # Assume 8 gpus
42 |   BASE_LR: 0.02
43 |   WEIGHT_DECAY: 0.0001
44 |   STEPS: (60000, 80000)
45 |   MAX_ITER: 90000
46 |   IMS_PER_BATCH: 16
47 | TEST:
48 |   IMS_PER_BATCH: 8
49 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |   RESNETS: # use GN for backbone
13 |     TRANS_FUNC: "BottleneckWithGN"
14 |     STEM_FUNC: "StemWithGN"
15 |   FPN:
16 |     USE_GN: True # use GN for FPN
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |     BATCH_SIZE_PER_IMAGE: 512
27 |     POSITIVE_FRACTION: 0.25
28 |   ROI_BOX_HEAD:
29 |     USE_GN: True # use GN for bbox head
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
32 |     POOLER_SAMPLING_RATIO: 2
33 |     CONV_HEAD_DIM: 256
34 |     NUM_STACKED_CONVS: 4
35 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 | DATASETS:
38 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
39 |   TEST: ("coco_2014_minival",)
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   # Assume 8 gpus
44 |   BASE_LR: 0.02
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (60000, 80000)
47 |   MAX_ITER: 90000
48 |   IMS_PER_BATCH: 16
49 | TEST:
50 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |   RESNETS: # use GN for backbone
13 |     TRANS_FUNC: "BottleneckWithGN"
14 |     STEM_FUNC: "StemWithGN"
15 |   FPN:
16 |     USE_GN: True # use GN for FPN
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |     BATCH_SIZE_PER_IMAGE: 512
27 |     POSITIVE_FRACTION: 0.25
28 |   ROI_BOX_HEAD:
29 |     USE_GN: True # use GN for bbox head
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
32 |     POOLER_SAMPLING_RATIO: 2
33 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
34 |     PREDICTOR: "FPNPredictor"
35 |   ROI_MASK_HEAD:
36 |     USE_GN: True # use GN for mask head
37 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
38 |     CONV_LAYERS: (256, 256, 256, 256)
39 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
40 |     PREDICTOR: "MaskRCNNC4Predictor"
41 |     POOLER_RESOLUTION: 14
42 |     POOLER_SAMPLING_RATIO: 2
43 |     RESOLUTION: 28
44 |     SHARE_BOX_FEATURE_EXTRACTOR: False
45 |   MASK_ON: True
46 | DATASETS:
47 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
48 |   TEST: ("coco_2014_minival",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   # Assume 8 gpus
53 |   BASE_LR: 0.02
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (60000, 80000)
56 |   MAX_ITER: 90000
57 |   IMS_PER_BATCH: 16
58 | TEST:
59 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |   RESNETS: # use GN for backbone
13 |     TRANS_FUNC: "BottleneckWithGN"
14 |     STEM_FUNC: "StemWithGN"
15 |   FPN:
16 |     USE_GN: True # use GN for FPN
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |     BATCH_SIZE_PER_IMAGE: 512
27 |     POSITIVE_FRACTION: 0.25
28 |   ROI_BOX_HEAD:
29 |     USE_GN: True # use GN for bbox head
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
32 |     POOLER_SAMPLING_RATIO: 2
33 |     CONV_HEAD_DIM: 256
34 |     NUM_STACKED_CONVS: 4
35 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 |   ROI_MASK_HEAD:
38 |     USE_GN: True # use GN for mask head
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     CONV_LAYERS: (256, 256, 256, 256)
41 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
42 |     PREDICTOR: "MaskRCNNC4Predictor"
43 |     POOLER_RESOLUTION: 14
44 |     POOLER_SAMPLING_RATIO: 2
45 |     RESOLUTION: 28
46 |     SHARE_BOX_FEATURE_EXTRACTOR: False
47 |   MASK_ON: True
48 | DATASETS:
49 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
50 |   TEST: ("coco_2014_minival",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   # Assume 8 gpus
55 |   BASE_LR: 0.02
56 |   WEIGHT_DECAY: 0.0001
57 |   STEPS: (60000, 80000)
58 |   MAX_ITER: 90000
59 |   IMS_PER_BATCH: 16
60 | TEST:
61 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
13 |   RESNETS: # use GN for backbone
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | DATALOADER:
40 |   SIZE_DIVISIBILITY: 32
41 | SOLVER:
42 |   # Assume 8 gpus
43 |   BASE_LR: 0.02
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (210000, 250000)
46 |   MAX_ITER: 270000
47 |   IMS_PER_BATCH: 16
48 | TEST:
49 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
13 |   RESNETS: # use GN for backbone
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 | DATASETS:
39 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
40 |   TEST: ("coco_2014_minival",)
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 8 gpus
45 |   BASE_LR: 0.02
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (210000, 250000)
48 |   MAX_ITER: 270000
49 |   IMS_PER_BATCH: 16
50 | TEST:
51 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
13 |   RESNETS: # use GN for backbone
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 |   ROI_MASK_HEAD:
37 |     USE_GN: True # use GN for mask head
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     CONV_LAYERS: (256, 256, 256, 256)
40 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
41 |     PREDICTOR: "MaskRCNNC4Predictor"
42 |     POOLER_RESOLUTION: 14
43 |     POOLER_SAMPLING_RATIO: 2
44 |     RESOLUTION: 28
45 |     SHARE_BOX_FEATURE_EXTRACTOR: False
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
49 |   TEST: ("coco_2014_minival",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   # Assume 8 gpus
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (210000, 250000)
57 |   MAX_ITER: 270000
58 |   IMS_PER_BATCH: 16
59 | TEST:
60 |   IMS_PER_BATCH: 8
61 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: 800
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     OUT_CHANNELS: 256
12 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
13 |   RESNETS: # use GN for backbone
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 |   ROI_MASK_HEAD:
39 |     USE_GN: True # use GN for mask head
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     CONV_LAYERS: (256, 256, 256, 256)
42 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
43 |     PREDICTOR: "MaskRCNNC4Predictor"
44 |     POOLER_RESOLUTION: 14
45 |     POOLER_SAMPLING_RATIO: 2
46 |     RESOLUTION: 28
47 |     SHARE_BOX_FEATURE_EXTRACTOR: False
48 |   MASK_ON: True
49 | DATASETS:
50 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
51 |   TEST: ("coco_2014_minival",)
52 | DATALOADER:
53 |   SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 |   # Assume 8 gpus
56 |   BASE_LR: 0.02
57 |   WEIGHT_DECAY: 0.0001
58 |   STEPS: (210000, 250000)
59 |   MAX_ITER: 270000
60 |   IMS_PER_BATCH: 16
61 | TEST:
62 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.001
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (50000, )
17 |   MAX_ITER: 70000
18 |   IMS_PER_BATCH: 1
19 | TEST:
20 |   IMS_PER_BATCH: 1
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 17500
18 |   IMS_PER_BATCH: 4
19 | TEST:
20 |   IMS_PER_BATCH: 4
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |     NUM_CLASSES: 21
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("voc_2012_train_cocostyle",)
34 |   TEST: ("voc_2012_val_cocostyle",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (18000,)
41 |   MAX_ITER: 24000
42 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 | DATASETS:
10 |   TRAIN: ("coco_2014_minival",)
11 |   TEST: ("coco_2014_minival",)
12 | INPUT:
13 |   MIN_SIZE_TRAIN: 600
14 |   MAX_SIZE_TRAIN: 1000
15 |   MIN_SIZE_TEST: 800
16 |   MAX_SIZE_TEST: 1000
17 | SOLVER:
18 |   BASE_LR: 0.005
19 |   WEIGHT_DECAY: 0.0001
20 |   STEPS: (1500,)
21 |   MAX_ITER: 2000
22 |   IMS_PER_BATCH: 2
23 | TEST:
24 |   IMS_PER_BATCH: 2
25 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_minival",)
25 |   TEST: ("coco_2014_minival",)
26 | INPUT:
27 |   MIN_SIZE_TRAIN: 600
28 |   MAX_SIZE_TRAIN: 1000
29 |   MIN_SIZE_TEST: 800
30 |   MAX_SIZE_TEST: 1000
31 | DATALOADER:
32 |   SIZE_DIVISIBILITY: 32
33 | SOLVER:
34 |   BASE_LR: 0.005
35 |   WEIGHT_DECAY: 0.0001
36 |   STEPS: (1500,)
37 |   MAX_ITER: 2000
38 |   IMS_PER_BATCH: 4
39 | TEST:
40 |   IMS_PER_BATCH: 2
41 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   RESNETS:
24 |     STRIDE_IN_1X1: False
25 |     NUM_GROUPS: 32
26 |     WIDTH_PER_GROUP: 8
27 | DATASETS:
28 |   TRAIN: ("coco_2014_minival",)
29 |   TEST: ("coco_2014_minival",)
30 | INPUT:
31 |   MIN_SIZE_TRAIN: 600
32 |   MAX_SIZE_TRAIN: 1000
33 |   MIN_SIZE_TEST: 800
34 |   MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (1500,)
41 |   MAX_ITER: 2000
42 |   IMS_PER_BATCH: 2
43 | TEST:
44 |   IMS_PER_BATCH: 2
45 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |   ROI_MASK_HEAD:
10 |     PREDICTOR: "MaskRCNNC4Predictor"
11 |     SHARE_BOX_FEATURE_EXTRACTOR: True
12 |   MASK_ON: True
13 | DATASETS:
14 |   TRAIN: ("coco_2014_minival",)
15 |   TEST: ("coco_2014_minival",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: 600
18 |   MAX_SIZE_TRAIN: 1000
19 |   MIN_SIZE_TEST: 800
20 |   MAX_SIZE_TEST: 1000
21 | SOLVER:
22 |   BASE_LR: 0.005
23 |   WEIGHT_DECAY: 0.0001
24 |   STEPS: (1500,)
25 |   MAX_ITER: 2000
26 |   IMS_PER_BATCH: 4
27 | TEST:
28 |   IMS_PER_BATCH: 2
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_minival",)
34 |   TEST: ("coco_2014_minival",)
35 | INPUT:
36 |   MIN_SIZE_TRAIN: 600
37 |   MAX_SIZE_TRAIN: 1000
38 |   MIN_SIZE_TEST: 800
39 |   MAX_SIZE_TEST: 1000
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   BASE_LR: 0.005
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (1500,)
46 |   MAX_ITER: 2000
47 |   IMS_PER_BATCH: 4
48 | TEST:
49 |   IMS_PER_BATCH: 2
50 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   RESNETS:
32 |     STRIDE_IN_1X1: False
33 |     NUM_GROUPS: 32
34 |     WIDTH_PER_GROUP: 8
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2014_minival",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: 600
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (1500,)
50 |   MAX_ITER: 2000
51 |   IMS_PER_BATCH: 2
52 | TEST:
53 |   IMS_PER_BATCH: 2
54 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_minival",)
10 |   TEST: ("coco_2014_minival",)
11 | INPUT:
12 |   MIN_SIZE_TRAIN: 600
13 |   MAX_SIZE_TRAIN: 1000
14 |   MIN_SIZE_TEST: 800
15 |   MAX_SIZE_TEST: 1000
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (1500,)
20 |   MAX_ITER: 2000
21 |   IMS_PER_BATCH: 4
22 | TEST:
23 |   IMS_PER_BATCH: 2
24 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 2000
13 |     FPN_POST_NMS_TOP_N_TEST: 2000
14 | DATASETS:
15 |   TRAIN: ("coco_2014_minival",)
16 |   TEST: ("coco_2014_minival",)
17 | INPUT:
18 |   MIN_SIZE_TRAIN: 600
19 |   MAX_SIZE_TRAIN: 1000
20 |   MIN_SIZE_TEST: 800
21 |   MAX_SIZE_TEST: 1000
22 | DATALOADER:
23 |   SIZE_DIVISIBILITY: 32
24 | SOLVER:
25 |   BASE_LR: 0.005
26 |   WEIGHT_DECAY: 0.0001
27 |   STEPS: (1500,)
28 |   MAX_ITER: 2000
29 |   IMS_PER_BATCH: 4
30 | TEST:
31 |   IMS_PER_BATCH: 2
32 | 


--------------------------------------------------------------------------------
/configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RRPN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |     RPN_HEAD: "SingleConvRRPNHead"
 8 | 
 9 |   ROI_BOX_HEAD:
10 |     NUM_CLASSES: 2
11 |   ROI_HEADS:
12 |     BATCH_SIZE_PER_IMAGE: 256
13 | 
14 | DATASETS:
15 |   TRAIN: ("RRPN_train", )
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (50000, 100000)
20 |   MAX_ITER: 140000
21 |   IMS_PER_BATCH: 1
22 | 
23 | OUTPUT_DIR: './models/IC-13-15-17-Trial-renew/'
24 | 


--------------------------------------------------------------------------------
/configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RRPN"
 3 |   WEIGHT: "./models/IC-13-15-17-Trial-multiscale-picked-ft/model_0200000.pth"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |     RPN_HEAD: "SingleConvRRPNHead"
 8 | 
 9 |   ROI_BOX_HEAD:
10 |     NUM_CLASSES: 2
11 |   ROI_HEADS:
12 |     BATCH_SIZE_PER_IMAGE: 256
13 |     NMS: 0.3
14 |     DETECTIONS_PER_IMG: 300
15 | 
16 | DATALOADER:
17 |   SIZE_DIVISIBILITY: 1
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/demo/ICDAR_demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['CUDA_VISIBLE_DEVICES'] = '6'
 3 | import cv2
 4 | import numpy as np
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import ICDARDemo
 7 | 
 8 | 
 9 | def write_result_ICDAR(im_file, dets, result_dir):
10 |     file_spl = im_file.split('/')
11 |     file_name = file_spl[len(file_spl) - 1]
12 |     file_name_arr = file_name.split(".")
13 | 
14 |     file_name_str = file_name_arr[0]
15 | 
16 |     if not os.path.isdir(result_dir):
17 |         os.makedirs(result_dir)
18 | 
19 |     result = os.path.join(result_dir, "res_" + file_name_str + ".txt")
20 | 
21 |     return_bboxes = []
22 | 
23 |     if not os.path.isfile(result):
24 |         os.mknod(result)
25 |     result_file = open(result, "w")
26 | 
27 |     result_str = ""
28 | 
29 |     for idx in range(len(dets)):
30 | 
31 |         l, t, r, b = dets[idx].astype(np.int32)[0:4]
32 | 
33 |         rotated_pts = [
34 |             [l, t], [r, t], [r, b], [l, b]
35 |         ]
36 | 
37 |         #det_str = str(int(rotated_pts[0][0])) + "," + str(int(rotated_pts[0][1])) + "," + \
38 |         #          str(int(rotated_pts[1][0])) + "," + str(int(rotated_pts[1][1])) + "," + \
39 |         #          str(int(rotated_pts[2][0])) + "," + str(int(rotated_pts[2][1])) + "," + \
40 |         #          str(int(rotated_pts[3][0])) + "," + str(int(rotated_pts[3][1])) + "\r\n"
41 | 
42 |         # rotated_pts = rotated_pts[:,0:2]
43 | 
44 |         # if (dets[idx][5] > threshold):
45 |         # rotated_pts = over_bound_handle(rotated_pts, height, width)
46 |         det_str = str(int(l)) + "," + str(int(t)) + "," + \
47 |                   str(int(r)) + "," + str(int(b)) + "\r\n"
48 | 
49 |         result_str = result_str + det_str
50 |         return_bboxes.append(dets[idx])
51 | 
52 |         # print rotated_pts.shape
53 | 
54 |     result_file.write(result_str)
55 |     result_file.close()
56 | 
57 |     return return_bboxes
58 | 
59 | 
60 | model_file = 'text_IC13'
61 | 
62 | result_dir = os.path.join('results', model_file)
63 | 
64 | if not os.path.isdir(result_dir):
65 |     os.makedirs(result_dir)
66 | 
67 | config_file = "../configs/e2e_faster_rcnn_R_50_C4_1x_ICDAR13_test.yaml"
68 | print('config_file:', config_file)
69 | # update the config options with the config file
70 | cfg.merge_from_file(config_file)
71 | # manual override some options
72 | cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
73 | 
74 | coco_demo = ICDARDemo(
75 |     cfg,
76 |     min_image_size=800,
77 |     confidence_threshold=0.7,
78 | )
79 | # load image and then run prediction
80 | image_dir = '../../datasets/ICDAR13/Challenge2_Test_Task12_Images/'
81 | 
82 | imlist = os.listdir(image_dir)
83 | 
84 | for image in imlist:
85 |     impath = os.path.join(image_dir, image)
86 |     print('image:', impath)
87 |     img = cv2.imread(impath)
88 |     predictions, bounding_boxes = coco_demo.run_on_opencv_image(img)
89 |     # print('predictions:', predictions.shape)
90 | 
91 |     bboxes_np = bounding_boxes.bbox.data.cpu().numpy()
92 |     write_result_ICDAR(image[:-4], bboxes_np, result_dir)
93 |     #cv2.imshow('win', predictions)
94 |     #cv2.waitKey(0)


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
 4 | 
 5 | 
 6 | ### With your preferred environment
 7 | 
 8 | You can start it by running it from this folder, using one of the following commands:
 9 | ```bash
10 | # by default, it runs on the GPU
11 | # for best results, use min-image-size 800
12 | python webcam.py --min-image-size 800
13 | # can also run it on the CPU
14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
15 | # or change the model that you want to use
16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
19 | ```
20 | 
21 | ### With Docker
22 | 
23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
24 | 
25 | Adjust permissions of the X server host (be careful with this step, refer to 
26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
27 | 
28 | ```bash
29 | xhost +
30 | ``` 
31 | 
32 | Then run a container with the demo:
33 |  
34 | ```
35 | docker run --rm -it \
36 |     -e DISPLAY=${DISPLAY} \
37 |     --privileged \
38 |     -v /tmp/.X11-unix:/tmp/.X11-unix \
39 |     --device=/dev/video0:/dev/video0 \
40 |     --ipc=host maskrcnn-benchmark \
41 |     python demo/webcam.py --min-image-size 300
42 | ```
43 | 
44 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 
45 | the volume mapping may vary depending on your platform*
46 | 


--------------------------------------------------------------------------------
/demo/RRPN_Demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import numpy as np
  4 | from maskrcnn_benchmark.config import cfg
  5 | from demo.predictor import ICDARDemo, RRPNDemo
  6 | from maskrcnn_benchmark.utils.visualize import vis_image, write_result_ICDAR_RRPN2polys, zip_dir
  7 | from PIL import Image
  8 | import time
  9 | 
 10 | 
 11 | config_file = "./configs/rrpn/e2e_rrpn_R_50_C4_1x_ICDAR13_15_17_trial_again_test.yaml"
 12 | 
 13 | # update the config options with the config file
 14 | cfg.merge_from_file(config_file)
 15 | # manual override some options
 16 | cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
 17 | # cfg.freeze()
 18 | # cfg.MODEL.WEIGHT = 'models/IC-13-15-17-Trial/model_0155000.pth'
 19 | 
 20 | result_dir = os.path.join('results', config_file.split('/')[-1].split('.')[0], cfg.MODEL.WEIGHT.split('/')[-1].split('.')[0])
 21 | 
 22 | if not os.path.isdir(result_dir):
 23 |     os.makedirs(result_dir)
 24 | 
 25 | 
 26 | coco_demo = RRPNDemo(
 27 |     cfg,
 28 |     min_image_size=1000,
 29 |     confidence_threshold=0.85,
 30 | )
 31 | 
 32 | dataset_name = 'IC15'
 33 | 
 34 | testing_dataset = {
 35 |     'IC13': {
 36 |         'testing_image_dir': '../datasets/ICDAR13/Challenge2_Test_Task12_Images',
 37 |         'test_vocal_dir': '../datasets/ICDAR13/ch2_test_vocabularies_per_image'
 38 |     },
 39 |     'IC15': {
 40 |         'testing_image_dir': '../datasets/ICDAR15/ch4_test_images',
 41 |         'test_vocal_dir': '../datasets/ICDAR15/ch4_test_vocabularies_per_image'
 42 |     },
 43 | }
 44 | 
 45 | image_dir = testing_dataset[dataset_name]['testing_image_dir']
 46 | vocab_dir = testing_dataset[dataset_name]['test_vocal_dir']
 47 | 
 48 | # load image and then run prediction
 49 | # image_dir = '../datasets/ICDAR13/Challenge2_Test_Task12_Images/'
 50 | imlist = os.listdir(image_dir)
 51 | 
 52 | print('************* META INFO ***************')
 53 | print('config_file:', config_file)
 54 | print('result_dir:', result_dir)
 55 | print('image_dir:', image_dir)
 56 | print('weights:', cfg.MODEL.WEIGHT)
 57 | print('***************************************')
 58 | 
 59 | vis = True
 60 | 
 61 | num_images = len(imlist)
 62 | cnt = 0
 63 | 
 64 | for image in imlist:
 65 |     impath = os.path.join(image_dir, image)
 66 |     # print('image:', impath)
 67 |     img = cv2.imread(impath)
 68 |     cnt += 1
 69 |     tic = time.time()
 70 |     predictions, bounding_boxes = coco_demo.run_on_opencv_image(img)
 71 |     toc = time.time()
 72 | 
 73 |     print('time cost:', str(toc - tic)[:6], '|', str(cnt) + '/' + str(num_images))
 74 | 
 75 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 76 |     bboxes_np = bounding_boxes.bbox.data.cpu().numpy()
 77 |     bboxes_np[:, 2:4] /= cfg.MODEL.RRPN.GT_BOX_MARGIN
 78 | 
 79 |     width, height = bounding_boxes.size
 80 | 
 81 |     if vis:
 82 |         pil_image = vis_image(Image.fromarray(img), bboxes_np)
 83 |         pil_image.show()
 84 |         time.sleep(10)
 85 |     write_result_ICDAR_RRPN2polys(image[:-4], bboxes_np, threshold=0.7, result_dir=result_dir, height=height, width=width)
 86 |     #im_file, dets, threshold, result_dir, height, width
 87 |     #cv2.imshow('win', predictions)
 88 |     #cv2.waitKey(0)
 89 | 
 90 | '''
 91 | if dataset_name == 'IC15':
 92 |     zipfilename = os.path.join(result_dir, 'submit_' + config_file.split('/')[-1].split('.')[0] + '_' + cfg.MODEL.WEIGHT.split('/')[-1].split('.')[0] + '.zip')
 93 |     if os.path.isfile(zipfilename):
 94 |         print('Zip file exists, removing it...')
 95 |         os.remove(zipfilename)
 96 |     zip_dir(result_dir, zipfilename)
 97 |     comm = 'curl -i -F "submissionFile=@' + zipfilename + '" http://127.0.0.1:8080/evaluate'
 98 |     # print(comm)
 99 |     print(os.popen(comm, 'r'))
100 | else:
101 |     pass
102 | '''


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/__init__.py


--------------------------------------------------------------------------------
/demo/results/rrpn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn1.png


--------------------------------------------------------------------------------
/demo/results/rrpn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn2.png


--------------------------------------------------------------------------------
/demo/results/rrpn3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn3.png


--------------------------------------------------------------------------------
/demo/results/rrpn4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn4.png


--------------------------------------------------------------------------------
/demo/results/rrpn5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/demo/results/rrpn5.png


--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import cv2
 4 | 
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import COCODemo
 7 | 
 8 | import time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |     )
19 |     parser.add_argument(
20 |         "--confidence-threshold",
21 |         type=float,
22 |         default=0.7,
23 |         help="Minimum score for the prediction to be shown",
24 |     )
25 |     parser.add_argument(
26 |         "--min-image-size",
27 |         type=int,
28 |         default=224,
29 |         help="Smallest size of the image to feed to the model. "
30 |             "Model was trained with 800, which gives best results",
31 |     )
32 |     parser.add_argument(
33 |         "--show-mask-heatmaps",
34 |         dest="show_mask_heatmaps",
35 |         help="Show a heatmap probability for the top masks-per-dim masks",
36 |         action="store_true",
37 |     )
38 |     parser.add_argument(
39 |         "--masks-per-dim",
40 |         type=int,
41 |         default=2,
42 |         help="Number of heatmaps per dimension to show",
43 |     )
44 |     parser.add_argument(
45 |         "opts",
46 |         help="Modify model config options using the command-line",
47 |         default=None,
48 |         nargs=argparse.REMAINDER,
49 |     )
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     # load config from file and command-line arguments
54 |     cfg.merge_from_file(args.config_file)
55 |     cfg.merge_from_list(args.opts)
56 |     cfg.freeze()
57 | 
58 |     # prepare object that handles inference plus adds predictions on top of image
59 |     coco_demo = COCODemo(
60 |         cfg,
61 |         confidence_threshold=args.confidence_threshold,
62 |         show_mask_heatmaps=args.show_mask_heatmaps,
63 |         masks_per_dim=args.masks_per_dim,
64 |         min_image_size=args.min_image_size,
65 |     )
66 | 
67 |     cam = cv2.VideoCapture(0)
68 |     while True:
69 |         start_time = time.time()
70 |         ret_val, img = cam.read()
71 |         composite = coco_demo.run_on_opencv_image(img)
72 |         print("Time: {:.2f} s / img".format(time.time() - start_time))
73 |         cv2.imshow("COCO detections", composite)
74 |         if cv2.waitKey(1) == 27:
75 |             break  # esc to quit
76 |     cv2.destroyAllWindows()
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \
11 |  && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev
12 | 
13 | # Install Miniconda
14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
15 |  && chmod +x /miniconda.sh \
16 |  && /miniconda.sh -b -p /miniconda \
17 |  && rm /miniconda.sh
18 | 
19 | ENV PATH=/miniconda/bin:$PATH
20 | 
21 | # Create a Python 3.6 environment
22 | RUN /miniconda/bin/conda install -y conda-build \
23 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
24 |  && /miniconda/bin/conda clean -ya
25 | 
26 | ENV CONDA_DEFAULT_ENV=py36
27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
28 | ENV PATH=$CONDA_PREFIX/bin:$PATH
29 | ENV CONDA_AUTO_UPDATE_CONDA=false
30 | 
31 | RUN conda install -y ipython
32 | RUN pip install ninja yacs cython matplotlib opencv-python
33 | 
34 | # Install PyTorch 1.0 Nightly and OpenCV
35 | RUN conda install -y pytorch-nightly -c pytorch \
36 |  && conda clean -ya
37 | 
38 | # Install TorchVision master
39 | RUN git clone https://github.com/pytorch/vision.git \
40 |  && cd vision \
41 |  && python setup.py install
42 | 
43 | # install pycocotools
44 | RUN git clone https://github.com/cocodataset/cocoapi.git \
45 |  && cd cocoapi/PythonAPI \
46 |  && python setup.py build_ext install
47 | 
48 | # install PyTorch Detection
49 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
50 |  && cd maskrcnn-benchmark \
51 |  && python setup.py build develop
52 | 
53 | WORKDIR /maskrcnn-benchmark
54 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++
11 | 
12 | # Install Miniconda
13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
14 |  && chmod +x /miniconda.sh \
15 |  && /miniconda.sh -b -p /miniconda \
16 |  && rm /miniconda.sh
17 | 
18 | ENV PATH=/miniconda/bin:$PATH
19 | 
20 | # Create a Python 3.6 environment
21 | RUN /miniconda/bin/conda install -y conda-build \
22 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
23 |  && /miniconda/bin/conda clean -ya
24 | 
25 | ENV CONDA_DEFAULT_ENV=py36
26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
27 | ENV PATH=$CONDA_PREFIX/bin:$PATH
28 | ENV CONDA_AUTO_UPDATE_CONDA=false
29 | 
30 | RUN conda install -y ipython
31 | RUN pip install ninja yacs cython matplotlib jupyter
32 | 
33 | # Install PyTorch 1.0 Nightly and OpenCV
34 | RUN conda install -y pytorch-nightly -c pytorch \
35 |  && conda install -y opencv -c menpo \
36 |  && conda clean -ya
37 | 
38 | WORKDIR /root
39 | 
40 | USER root
41 | 
42 | RUN mkdir /notebooks
43 | 
44 | WORKDIR /notebooks
45 | 
46 | # Install TorchVision master
47 | RUN git clone https://github.com/pytorch/vision.git \
48 |  && cd vision \
49 |  && python setup.py install
50 | 
51 | # install pycocotools
52 | RUN git clone https://github.com/cocodataset/cocoapi.git \
53 |  && cd cocoapi/PythonAPI \
54 |  && python setup.py build_ext install
55 | 
56 | # install PyTorch Detection
57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
58 |  && cd maskrcnn-benchmark \
59 |  && python setup.py build develop
60 | 
61 | RUN jupyter notebook --generate-config
62 | 
63 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py"
64 | 
65 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH}
66 | 
67 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"]
68 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from IPython.lib import passwd
 3 | 
 4 | #c = c  # pylint:disable=undefined-variable
 5 | c = get_config()
 6 | c.NotebookApp.ip = '0.0.0.0'
 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888))
 8 | c.NotebookApp.open_browser = False
 9 | 
10 | # sets a password if PASSWORD is set in the environment
11 | if 'PASSWORD' in os.environ:
12 |   password = os.environ['PASSWORD']
13 |   if password:
14 |     c.NotebookApp.password = passwd(password)
15 |   else:
16 |     c.NotebookApp.password = ''
17 |     c.NotebookApp.token = ''
18 |   del os.environ['PASSWORD']
19 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/RROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | std::tuple<at::Tensor, at::Tensor, at::Tensor> RROIAlign_forward(const at::Tensor& input,
12 |                                   const at::Tensor& rois,
13 |                                   const float spatial_scale,
14 |                                   const int pooled_height,
15 |                                   const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return RROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 |   //return RROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor RROIAlign_backward(const at::Tensor& grad,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& con_idx_x,
30 |                                  const at::Tensor& con_idx_y,
31 |                                  const float spatial_scale,
32 |                                  const int pooled_height,
33 |                                  const int pooled_width,
34 |                                  const int batch_size,
35 |                                  const int channels,
36 |                                  const int height,
37 |                                  const int width) {
38 |   if (grad.type().is_cuda()) {
39 | #ifdef WITH_CUDA
40 |     return RROIAlign_backward_cuda(grad, rois, con_idx_x, con_idx_y, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
41 | #else
42 |     AT_ERROR("Not compiled with GPU support");
43 | #endif
44 |   }
45 |   AT_ERROR("Not implemented on the CPU");
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | std::tuple<at::Tensor, at::Tensor, at::Tensor> RROIAlign_forward_cuda(const at::Tensor& input,
26 |                                   const at::Tensor& rois,
27 |                                   const float spatial_scale,
28 |                                   const int pooled_height,
29 |                                   const int pooled_width);
30 | 
31 | at::Tensor RROIAlign_backward_cuda(const at::Tensor& grad,
32 |                                  const at::Tensor& rois,
33 |                                  const at::Tensor& con_idx_x,
34 |                                  const at::Tensor& con_idx_y,
35 |                                  const float spatial_scale,
36 |                                  const int pooled_height,
37 |                                  const int pooled_width,
38 |                                  const int batch_size,
39 |                                  const int channels,
40 |                                  const int height,
41 |                                  const int width);
42 | 
43 | 
44 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
45 |                                 const at::Tensor& rois,
46 |                                 const float spatial_scale,
47 |                                 const int pooled_height,
48 |                                 const int pooled_width);
49 | 
50 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
51 |                                  const at::Tensor& input,
52 |                                  const at::Tensor& rois,
53 |                                  const at::Tensor& argmax,
54 |                                  const float spatial_scale,
55 |                                  const int pooled_height,
56 |                                  const int pooled_width,
57 |                                  const int batch_size,
58 |                                  const int channels,
59 |                                  const int height,
60 |                                  const int width);
61 | 
62 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
63 | 
64 | 
65 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
66 |                              const int height,
67 |                              const int width);
68 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "RROIAlign.h"
 6 | 
 7 | 
 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 9 |   m.def("nms", &nms, "non-maximum suppression");
10 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
11 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
12 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
13 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
14 | 
15 |   m.def("rroi_align_forward", &RROIAlign_forward, "RROIAlign_forward");
16 |   m.def("rroi_align_backward", &RROIAlign_backward, "RROIAlign_backward");
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/README.md:
--------------------------------------------------------------------------------
 1 | # Setting Up Datasets
 2 | This file describes how to perform training on other datasets.
 3 | 
 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently.
 5 | 
 6 | We expect the annotations from other datasets be converted to COCO json format, and
 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm)
 8 | 
 9 | ## Creating Symlinks for PASCAL VOC
10 | 
11 | We assume that your symlinked `datasets/voc/VOC<year>` directory has the following structure:
12 | 
13 | ```
14 | VOC<year>
15 | |_ JPEGImages
16 | |  |_ <im-1-name>.jpg
17 | |  |_ ...
18 | |  |_ <im-N-name>.jpg
19 | |_ Annotations
20 | |  |_ pascal_train<year>.json (optional)
21 | |  |_ pascal_val<year>.json (optional)
22 | |  |_ pascal_test<year>.json (optional)
23 | |  |_ <im-1-name>.xml
24 | |  |_ ...
25 | |  |_ <im-N-name>.xml
26 | |_ VOCdevkit<year>
27 | ```
28 | 
29 | Create symlinks for `voc/VOC<year>`:
30 | 
31 | ```
32 | cd ~/github/maskrcnn-benchmark
33 | mkdir -p datasets/voc/VOC<year>
34 | ln -s /path/to/VOC<year> /datasets/voc/VOC<year>
35 | ```
36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/).
37 | 
38 | ### PASCAL VOC Annotations in COCO Format
39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip)
40 | via http://cocodataset.org/#external.
41 | 
42 | ## Creating Symlinks for Cityscapes:
43 | 
44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure:
45 | 
46 | ```
47 | cityscapes
48 | |_ images
49 | |  |_ <im-1-name>.jpg
50 | |  |_ ...
51 | |  |_ <im-N-name>.jpg
52 | |_ annotations
53 | |  |_ instanceonly_gtFile_train.json
54 | |  |_ ...
55 | |_ raw
56 |    |_ gtFine
57 |    |_ ...
58 |    |_ README.md
59 | ```
60 | 
61 | Create symlinks for `cityscapes`:
62 | 
63 | ```
64 | cd ~/github/maskrcnn-benchmark
65 | mkdir -p datasets/cityscapes
66 | ln -s /path/to/cityscapes datasets/data/cityscapes
67 | ```
68 | 
69 | ### Steps to convert Cityscapes Annotations to COCO Format
70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required)
71 | 2. Extract it to /path/to/gtFine_trainvaltest
72 | ```
73 | gtFine_trainvaltest
74 | |_ gtFine
75 | ```
76 | 3. Run the below commands to convert the annotations
77 | 
78 | ```
79 | cd ~/github
80 | git clone https://github.com/mcordts/cityscapesScripts.git
81 | cd cityscapesScripts
82 | cp ~/github/maskrcnn-benchmark/tool/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation
83 | python setup.py install
84 | cd ~/github/maskrcnn-benchmark
85 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/gtFine_trainvaltest --outdir /path/to/cityscapes/annotations
86 | ```
87 | 
88 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/).
89 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .coco import COCODataset
 3 | from .voc import PascalVOCDataset
 4 | from .concat_dataset import ConcatDataset
 5 | from .icdar_series import ICDAR2013Dataset
 6 | from .rotation_series import RotationDataset
 7 | from .rrpn_e2e_series import SpottingDataset
 8 | from .rotation_mask_datasets import RotationMaskDataset
 9 | 
10 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", 'ICDAR2013Dataset', 'RotationDataset', 'SpottingDataset', 'RotationMaskDataset']
11 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/coco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | import torchvision
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
 7 | 
 8 | 
 9 | class COCODataset(torchvision.datasets.coco.CocoDetection):
10 |     def __init__(
11 |         self, ann_file, root, remove_images_without_annotations, transforms=None
12 |     ):
13 |         super(COCODataset, self).__init__(root, ann_file)
14 |         # sort indices for reproducible results
15 |         self.ids = sorted(self.ids)
16 | 
17 |         # filter images without detection annotations
18 |         if remove_images_without_annotations:
19 |             self.ids = [
20 |                 img_id
21 |                 for img_id in self.ids
22 |                 if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
23 |             ]
24 | 
25 |             ids_to_remove = []
26 |             for img_id in self.ids:
27 |                 ann_ids = self.coco.getAnnIds(imgIds=img_id)
28 |                 anno = self.coco.loadAnns(ann_ids)
29 |                 if all(
30 |                     any(o <= 1 for o in obj["bbox"][2:])
31 |                     for obj in anno
32 |                     if obj["iscrowd"] == 0
33 |                 ):
34 |                     ids_to_remove.append(img_id)
35 | 
36 |             self.ids = [
37 |                 img_id for img_id in self.ids if img_id not in ids_to_remove
38 |             ]
39 | 
40 |         self.json_category_id_to_contiguous_id = {
41 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
42 |         }
43 |         self.contiguous_category_id_to_json_id = {
44 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
45 |         }
46 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
47 |         self.transforms = transforms
48 | 
49 |     def __getitem__(self, idx):
50 |         img, anno = super(COCODataset, self).__getitem__(idx)
51 | 
52 |         # filter crowd annotations
53 |         # TODO might be better to add an extra field
54 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
55 | 
56 |         boxes = [obj["bbox"] for obj in anno]
57 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
58 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
59 | 
60 |         classes = [obj["category_id"] for obj in anno]
61 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
62 |         classes = torch.tensor(classes)
63 |         target.add_field("labels", classes)
64 | 
65 |         masks = [obj["segmentation"] for obj in anno]
66 |         masks = SegmentationMask(masks, img.size)
67 |         target.add_field("masks", masks)
68 | 
69 |         target = target.clip_to_image(remove_empty=True)
70 | 
71 |         if self.transforms is not None:
72 |             img, target = self.transforms(img, target)
73 | 
74 |         return img, target, idx
75 | 
76 |     def get_img_info(self, index):
77 |         img_id = self.id_to_img_map[index]
78 |         img_data = self.coco.imgs[img_id]
79 |         return img_data
80 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | 
 6 | 
 7 | def evaluate(dataset, predictions, output_folder, **kwargs):
 8 |     """evaluate dataset using different methods based on dataset type.
 9 |     Args:
10 |         dataset: Dataset object
11 |         predictions(list[BoxList]): each item in the list represents the
12 |             prediction results for one image.
13 |         output_folder: output folder, to save evaluation files or results.
14 |         **kwargs: other args.
15 |     Returns:
16 |         evaluation result
17 |     """
18 |     args = dict(
19 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
20 |     )
21 |     if isinstance(dataset, datasets.COCODataset):
22 |         return coco_evaluation(**args)
23 |     elif isinstance(dataset, datasets.PascalVOCDataset):
24 |         return voc_evaluation(**args)
25 |     else:
26 |         dataset_name = dataset.__class__.__name__
27 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
28 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     box_only,
 9 |     iou_types,
10 |     expected_results,
11 |     expected_results_sigma_tol,
12 | ):
13 |     return do_coco_evaluation(
14 |         dataset=dataset,
15 |         predictions=predictions,
16 |         box_only=box_only,
17 |         output_folder=output_folder,
18 |         iou_types=iou_types,
19 |         expected_results=expected_results,
20 |         expected_results_sigma_tol=expected_results_sigma_tol,
21 |     )
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = True
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset : offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |     else:
11 |         min_size = cfg.INPUT.MIN_SIZE_TEST
12 |         max_size = cfg.INPUT.MAX_SIZE_TEST
13 |         flip_prob = 0
14 | 
15 |     to_bgr255 = cfg.INPUT.TO_BGR255
16 |     normalize_transform = T.Normalize(
17 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
18 |     )
19 | 
20 |     _aug_list = {
21 |         "RRPN":T.Compose(
22 |             [
23 |                 T.Resize(min_size, max_size),
24 |                 T.RandomRotation(prob=1.0, r_range=cfg.INPUT.ROTATION_RANGE, fixed_angle=-1, gt_margin=cfg.MODEL.RRPN.GT_BOX_MARGIN),
25 |                 T.ToTensor(),
26 |                 # T.MixUp(mix_ratio=0.1),
27 |                 normalize_transform,
28 |             ]
29 |         ),
30 |         "GeneralizedRCNN":T.Compose(
31 |             [
32 |                 T.Resize(min_size, max_size),
33 |                 T.RandomHorizontalFlip(flip_prob),
34 |                 T.ToTensor(),
35 |                 normalize_transform,
36 |             ]
37 |         )
38 |     }
39 |     '''
40 |     if cfg.MODEL.META_ARCHITECTURE == "RRPN":
41 |         transform = T.Compose(
42 |             [
43 |                 T.Resize(min_size, max_size),
44 |                 T.RandomRotation(prob=0, fixed_angle=30),
45 |                 T.ToTensor(),
46 |                 normalize_transform,
47 |             ]
48 |         )
49 |     else:
50 |         transform = T.Compose(
51 |             [
52 |                 T.Resize(min_size, max_size),
53 |                 T.RandomHorizontalFlip(flip_prob),
54 |                 T.ToTensor(),
55 |                 normalize_transform,
56 |             ]
57 |         )
58 |     '''
59 |     return _aug_list[cfg.MODEL.META_ARCHITECTURE]
60 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import ConvTranspose2d
 7 | from .misc import interpolate
 8 | from .nms import nms
 9 | from .roi_align import ROIAlign
10 | from .rroi_align import RROIAlign
11 | from .roi_align import roi_align
12 | from .rroi_align import rroi_align
13 | from .roi_pool import ROIPool
14 | from .roi_pool import roi_pool
15 | from .smooth_l1_loss import smooth_l1_loss
16 | 
17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 
18 |            "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", 
19 |            "FrozenBatchNorm2d", "RROIAlign"
20 |            ]
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         scale = self.weight * self.running_var.rsqrt()
21 |         bias = self.bias - self.running_mean * scale
22 |         scale = scale.reshape(1, -1, 1, 1)
23 |         bias = bias.reshape(1, -1, 1, 1)
24 |         return x * scale + bias
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | helper class that supports empty tensors on some nn functions.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in
  6 | those functions.
  7 | 
  8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013
  9 | is implemented
 10 | """
 11 | 
 12 | import math
 13 | import torch
 14 | from torch.nn.modules.utils import _ntuple
 15 | 
 16 | 
 17 | class _NewEmptyTensorOp(torch.autograd.Function):
 18 |     @staticmethod
 19 |     def forward(ctx, x, new_shape):
 20 |         ctx.shape = x.shape
 21 |         return x.new_empty(new_shape)
 22 | 
 23 |     @staticmethod
 24 |     def backward(ctx, grad):
 25 |         shape = ctx.shape
 26 |         return _NewEmptyTensorOp.apply(grad, shape), None
 27 | 
 28 | 
 29 | 
 30 | class Conv2d(torch.nn.Conv2d):
 31 |     def forward(self, x):
 32 |         if x.numel() > 0:
 33 |             return super(Conv2d, self).forward(x)
 34 |         # get output shape
 35 | 
 36 |         output_shape = [
 37 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
 38 |             for i, p, di, k, d in zip(
 39 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
 40 |             )
 41 |         ]
 42 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 43 |         return _NewEmptyTensorOp.apply(x, output_shape)
 44 | 
 45 | 
 46 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
 47 |     def forward(self, x):
 48 |         if x.numel() > 0:
 49 |             return super(ConvTranspose2d, self).forward(x)
 50 |         # get output shape
 51 | 
 52 |         output_shape = [
 53 |             (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
 54 |             for i, p, di, k, d, op in zip(
 55 |                 x.shape[-2:],
 56 |                 self.padding,
 57 |                 self.dilation,
 58 |                 self.kernel_size,
 59 |                 self.stride,
 60 |                 self.output_padding,
 61 |             )
 62 |         ]
 63 |         output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
 64 |         return _NewEmptyTensorOp.apply(x, output_shape)
 65 | 
 66 | 
 67 | def interpolate(
 68 |     input, size=None, scale_factor=None, mode="nearest", align_corners=None
 69 | ):
 70 |     if input.numel() > 0:
 71 |         return torch.nn.functional.interpolate(
 72 |             input, size, scale_factor, mode, align_corners
 73 |         )
 74 | 
 75 |     def _check_size_scale_factor(dim):
 76 |         if size is None and scale_factor is None:
 77 |             raise ValueError("either size or scale_factor should be defined")
 78 |         if size is not None and scale_factor is not None:
 79 |             raise ValueError("only one of size or scale_factor should be defined")
 80 |         if (
 81 |             scale_factor is not None
 82 |             and isinstance(scale_factor, tuple)
 83 |             and len(scale_factor) != dim
 84 |         ):
 85 |             raise ValueError(
 86 |                 "scale_factor shape must match input shape. "
 87 |                 "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
 88 |             )
 89 | 
 90 |     def _output_size(dim):
 91 |         _check_size_scale_factor(dim)
 92 |         if size is not None:
 93 |             return size
 94 |         scale_factors = _ntuple(dim)(scale_factor)
 95 |         # math.floor might return float in py2.7
 96 |         return [
 97 |             int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
 98 |         ]
 99 | 
100 |     output_shape = tuple(_output_size(2))
101 |     output_shape = input.shape[:-2] + output_shape
102 |     return _NewEmptyTensorOp.apply(input, output_shape)
103 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from maskrcnn_benchmark import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/rroi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/arpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | from torch import nn
 5 | 
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform
 8 | from . import fpn as fpn_module
 9 | from . import resnet
10 | 
11 | 
12 | @registry.BACKBONES.register("R-50-C4")
13 | @registry.BACKBONES.register("R-50-C5")
14 | @registry.BACKBONES.register("R-50-FP4P")
15 | @registry.BACKBONES.register("R-101-C4")
16 | @registry.BACKBONES.register("R-101-C5")
17 | def build_resnet_backbone(cfg):
18 |     body = resnet.ResNet(cfg)
19 |     model = nn.Sequential(OrderedDict([("body", body)]))
20 |     return model
21 | 
22 | 
23 | @registry.BACKBONES.register("R-50-FPN")
24 | @registry.BACKBONES.register("R-101-FPN")
25 | def build_resnet_fpn_backbone(cfg):
26 |     body = resnet.ResNet(cfg)
27 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
28 |     out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
29 |     fpn = fpn_module.FPN(
30 |         in_channels_list=[
31 |             in_channels_stage2,
32 |             in_channels_stage2 * 2,
33 |             in_channels_stage2 * 4,
34 |             in_channels_stage2 * 8,
35 |         ],
36 |         out_channels=out_channels,
37 |         conv_block=conv_with_kaiming_uniform(
38 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
39 |         ),
40 |         top_blocks=fpn_module.LastLevelMaxPool(),
41 |     )
42 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
43 |     return model
44 | 
45 | 
46 | def build_backbone(cfg):
47 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
48 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
49 |             cfg.MODEL.BACKBONE.CONV_BODY
50 |         )
51 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
52 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 | 
39 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
40 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
41 | 
42 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
43 |             # protect against not enough positive examples
44 |             num_pos = min(positive.numel(), num_pos)
45 |             num_neg = self.batch_size_per_image - num_pos
46 |             # protect against not enough negative examples
47 |             num_neg = min(negative.numel(), num_neg)
48 | 
49 |             #print('matched_idxs_per_image:', matched_idxs_per_image)
50 |             #print('positive:', positive, 'negative', negative)
51 | 
52 |             # randomly select positive and negative examples
53 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
54 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
55 | 
56 |             pos_idx_per_image = positive[perm1]
57 |             neg_idx_per_image = negative[perm2]
58 | 
59 |             # create binary mask from indices
60 |             pos_idx_per_image_mask = torch.zeros_like(
61 |                 matched_idxs_per_image, dtype=torch.uint8
62 |             )
63 |             neg_idx_per_image_mask = torch.zeros_like(
64 |                 matched_idxs_per_image, dtype=torch.uint8
65 |             )
66 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
67 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
68 | 
69 |             pos_idx.append(pos_idx_per_image_mask)
70 |             neg_idx.append(neg_idx_per_image_mask)
71 | 
72 |         return pos_idx, neg_idx
73 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | from .generalized_rrpn_rcnn import GeneralizedRRPNRCNN
 4 | 
 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN,
 6 |                                  "RRPN":GeneralizedRRPNRCNN}
 7 | 
 8 | 
 9 | def build_detection_model(cfg):
10 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
11 |     return meta_arch(cfg)
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..roi_heads.roi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRCNN, self).__init__()
28 | 
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg)
31 |         self.roi_heads = build_roi_heads(cfg)
32 | 
33 |     def forward(self, images, targets=None):
34 |         """
35 |         Arguments:
36 |             images (list[Tensor] or ImageList): images to be processed
37 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
38 | 
39 |         Returns:
40 |             result (list[BoxList] or dict[Tensor]): the output from the model.
41 |                 During training, it returns a dict[Tensor] which contains the losses.
42 |                 During testing, it returns list[BoxList] contains additional fields
43 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
44 | 
45 |         """
46 |         if self.training and targets is None:
47 |             raise ValueError("In training mode, targets should be passed")
48 |         images = to_image_list(images)
49 |         features = self.backbone(images.tensors)
50 |         proposals, proposal_losses = self.rpn(images, features, targets)
51 |         if self.roi_heads:
52 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
53 |         else:
54 |             # RPN-only models don't have roi_heads
55 |             x = features
56 |             result = proposals
57 |             detector_losses = {}
58 | 
59 |         if self.training:
60 |             losses = {}
61 |             losses.update(detector_losses)
62 |             losses.update(proposal_losses)
63 |             return losses
64 | 
65 |         return result
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rrpn_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rrpn.rrpn import build_rpn
13 | from ..roi_heads.rroi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRRPNRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRRPNRCNN, self).__init__()
28 | 
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg)
31 |         self.roi_heads = build_roi_heads(cfg)
32 |         self.fp4p_on = cfg.MODEL.FP4P_ON
33 | 
34 |     def forward(self, images, targets=None):
35 |         """
36 |         Arguments:
37 |             images (list[Tensor] or ImageList): images to be processed
38 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
39 | 
40 |         Returns:
41 |             result (list[BoxList] or dict[Tensor]): the output from the model.
42 |                 During training, it returns a dict[Tensor] which contains the losses.
43 |                 During testing, it returns list[BoxList] contains additional fields
44 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
45 | 
46 |         """
47 |         if self.training and targets is None:
48 |             raise ValueError("In training mode, targets should be passed")
49 |         images = to_image_list(images)
50 |         features = self.backbone(images.tensors)
51 | 
52 |         if self.fp4p_on:
53 |             # get you C4
54 |             proposals, proposal_losses = self.rpn(images, (features[-1],), targets)
55 |         else:
56 |             proposals, proposal_losses = self.rpn(images, features, targets)
57 | 
58 |         # features = [feature.detach() for feature in features]
59 | 
60 |         if self.roi_heads:
61 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
62 |         else:
63 |             # RPN-only models don't have roi_heads
64 |             x = features
65 |             result = proposals
66 |             detector_losses = {}
67 | 
68 |         if self.training:
69 |             losses = {}
70 |             losses.update(detector_losses)
71 |             losses.update(proposal_losses)
72 |             return losses
73 | 
74 |         return result
75 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from maskrcnn_benchmark.utils.registry import Registry
4 | 
5 | BACKBONES = Registry()
6 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
7 | RPN_HEADS = Registry()
8 | RROI_BOX_FEATURE_EXTRACTORS = Registry()


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | 
10 | 
11 | class ROIBoxHead(torch.nn.Module):
12 |     """
13 |     Generic Box Head class.
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         super(ROIBoxHead, self).__init__()
18 |         self.feature_extractor = make_roi_box_feature_extractor(cfg)
19 |         self.predictor = make_roi_box_predictor(cfg)
20 |         self.post_processor = make_roi_box_post_processor(cfg)
21 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
22 | 
23 |     def forward(self, features, proposals, targets=None):
24 |         """
25 |         Arguments:
26 |             features (list[Tensor]): feature-maps from possibly several levels
27 |             proposals (list[BoxList]): proposal boxes
28 |             targets (list[BoxList], optional): the ground-truth targets.
29 | 
30 |         Returns:
31 |             x (Tensor): the result of the feature extractor
32 |             proposals (list[BoxList]): during training, the subsampled proposals
33 |                 are returned. During testing, the predicted boxlists are returned
34 |             losses (dict[Tensor]): During training, returns the losses for the
35 |                 head. During testing, returns an empty dict.
36 |         """
37 | 
38 |         if self.training:
39 |             # Faster R-CNN subsamples during training the proposals with a fixed
40 |             # positive / negative ratio
41 |             with torch.no_grad():
42 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
43 | 
44 |         # extract features that will be fed to the final classifier. The
45 |         # feature_extractor generally corresponds to the pooler + heads
46 |         x = self.feature_extractor(features, proposals)
47 |         # final classifier that converts the features into predictions
48 |         class_logits, box_regression = self.predictor(x)
49 | 
50 |         if not self.training:
51 |             result = self.post_processor((class_logits, box_regression), proposals)
52 |             return x, result, {}
53 | 
54 |         loss_classifier, loss_box_reg = self.loss_evaluator(
55 |             [class_logits], [box_regression]
56 |         )
57 |         return (
58 |             x,
59 |             proposals,
60 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
61 |         )
62 | 
63 | 
64 | def build_roi_box_head(cfg):
65 |     """
66 |     Constructs a new box head.
67 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
68 |     and make it a parameter in the config
69 |     """
70 |     return ROIBoxHead(cfg)
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | 
 4 | 
 5 | class FastRCNNPredictor(nn.Module):
 6 |     def __init__(self, config, pretrained=None):
 7 |         super(FastRCNNPredictor, self).__init__()
 8 | 
 9 |         stage_index = 4
10 |         stage2_relative_factor = 2 ** (stage_index - 1)
11 |         res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
12 |         num_inputs = res2_out_channels * stage2_relative_factor
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         self.bbox_pred = nn.Linear(num_inputs, num_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | class FPNPredictor(nn.Module):
34 |     def __init__(self, cfg):
35 |         super(FPNPredictor, self).__init__()
36 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
37 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
38 | 
39 |         self.cls_score = nn.Linear(representation_size, num_classes)
40 |         self.bbox_pred = nn.Linear(representation_size, num_classes * 4)
41 | 
42 |         nn.init.normal_(self.cls_score.weight, std=0.01)
43 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
44 |         for l in [self.cls_score, self.bbox_pred]:
45 |             nn.init.constant_(l.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         scores = self.cls_score(x)
49 |         bbox_deltas = self.bbox_pred(x)
50 | 
51 |         return scores, bbox_deltas
52 | 
53 | 
54 | _ROI_BOX_PREDICTOR = {
55 |     "FastRCNNPredictor": FastRCNNPredictor,
56 |     "FPNPredictor": FPNPredictor,
57 | }
58 | 
59 | 
60 | def make_roi_box_predictor(cfg):
61 |     func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], BoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIMaskHead(torch.nn.Module):
37 |     def __init__(self, cfg):
38 |         super(ROIMaskHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg)
41 |         self.predictor = make_roi_mask_predictor(cfg)
42 |         self.post_processor = make_roi_mask_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None):
46 |         """
47 |         Arguments:
48 |             features (list[Tensor]): feature-maps from possibly several levels
49 |             proposals (list[BoxList]): proposal boxes
50 |             targets (list[BoxList], optional): the ground-truth targets.
51 | 
52 |         Returns:
53 |             x (Tensor): the result of the feature extractor
54 |             proposals (list[BoxList]): during training, the original proposals
55 |                 are returned. During testing, the predicted boxlists are returned
56 |                 with the `mask` field set
57 |             losses (dict[Tensor]): During training, returns the losses for the
58 |                 head. During testing, returns an empty dict.
59 |         """
60 | 
61 |         if self.training:
62 |             # during training, only focus on positive boxes
63 |             all_proposals = proposals
64 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
65 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
66 |             x = features
67 |             x = x[torch.cat(positive_inds, dim=0)]
68 |         else:
69 |             x = self.feature_extractor(features, proposals)
70 |         mask_logits = self.predictor(x)
71 | 
72 |         if not self.training:
73 |             result = self.post_processor(mask_logits, proposals)
74 |             return x, result, {}
75 | 
76 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
77 | 
78 |         return x, all_proposals, dict(loss_mask=loss_mask)
79 | 
80 | 
81 | def build_roi_mask_head(cfg):
82 |     return ROIMaskHead(cfg)
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 9 | 
10 | 
11 | class MaskRCNNFPNFeatureExtractor(nn.Module):
12 |     """
13 |     Heads for FPN for classification
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         """
18 |         Arguments:
19 |             num_classes (int): number of output classes
20 |             input_size (int): number of channels of the input once it's flattened
21 |             representation_size (int): size of the intermediate representation
22 |         """
23 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
24 | 
25 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
26 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
27 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
28 |         pooler = Pooler(
29 |             output_size=(resolution, resolution),
30 |             scales=scales,
31 |             sampling_ratio=sampling_ratio,
32 |         )
33 |         input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
34 |         self.pooler = pooler
35 | 
36 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
37 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
38 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
39 | 
40 |         next_feature = input_size
41 |         self.blocks = []
42 |         for layer_idx, layer_features in enumerate(layers, 1):
43 |             layer_name = "mask_fcn{}".format(layer_idx)
44 |             module = make_conv3x3(next_feature, layer_features, 
45 |                 dilation=dilation, stride=1, use_gn=use_gn
46 |             )
47 |             self.add_module(layer_name, module)
48 |             next_feature = layer_features
49 |             self.blocks.append(layer_name)
50 | 
51 |     def forward(self, x, proposals):
52 |         x = self.pooler(x, proposals)
53 | 
54 |         for layer_name in self.blocks:
55 |             x = F.relu(getattr(self, layer_name)(x))
56 | 
57 |         return x
58 | 
59 | 
60 | _ROI_MASK_FEATURE_EXTRACTORS = {
61 |     "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor,
62 |     "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor,
63 | }
64 | 
65 | 
66 | def make_roi_mask_feature_extractor(cfg):
67 |     func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR]
68 |     return func(cfg)
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | 
 8 | 
 9 | class MaskRCNNC4Predictor(nn.Module):
10 |     def __init__(self, cfg):
11 |         super(MaskRCNNC4Predictor, self).__init__()
12 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
13 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
14 | 
15 |         if cfg.MODEL.ROI_HEADS.USE_FPN:
16 |             num_inputs = dim_reduced
17 |         else:
18 |             stage_index = 4
19 |             stage2_relative_factor = 2 ** (stage_index - 1)
20 |             res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
21 |             num_inputs = res2_out_channels * stage2_relative_factor
22 | 
23 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
24 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
25 | 
26 |         for name, param in self.named_parameters():
27 |             if "bias" in name:
28 |                 nn.init.constant_(param, 0)
29 |             elif "weight" in name:
30 |                 # Caffe2 implementation uses MSRAFill, which in fact
31 |                 # corresponds to kaiming_normal_ in PyTorch
32 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
33 | 
34 |     def forward(self, x):
35 |         x = F.relu(self.conv5_mask(x))
36 |         return self.mask_fcn_logits(x)
37 | 
38 | 
39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor}
40 | 
41 | 
42 | def make_roi_mask_predictor(cfg):
43 |     func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
44 |     return func(cfg)
45 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rbox_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rbox_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rbox_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | 
10 | 
11 | class ROIBoxHead(torch.nn.Module):
12 |     """
13 |     Generic Box Head class.
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         super(ROIBoxHead, self).__init__()
18 |         self.feature_extractor = make_roi_box_feature_extractor(cfg)
19 |         self.predictor = make_roi_box_predictor(cfg)
20 |         self.post_processor = make_roi_box_post_processor(cfg)
21 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
22 | 
23 |         self.cfg = cfg
24 | 
25 |     def forward(self, features, proposals, targets=None):
26 |         """
27 |         Arguments:
28 |             features (list[Tensor]): feature-maps from possibly several levels
29 |             proposals (list[BoxList]): proposal boxes
30 |             targets (list[BoxList], optional): the ground-truth targets.
31 | 
32 |         Returns:
33 |             x (Tensor): the result of the feature extractor
34 |             proposals (list[BoxList]): during training, the subsampled proposals
35 |                 are returned. During testing, the predicted boxlists are returned
36 |             losses (dict[Tensor]): During training, returns the losses for the
37 |                 head. During testing, returns an empty dict.
38 |         """
39 | 
40 |         # if self.cfg.TEST.CASCADE:
41 |         recur_iter = self.cfg.MODEL.ROI_HEADS.RECUR_ITER if self.cfg.TEST.CASCADE else 1
42 | 
43 |         recur_proposals = proposals
44 |         x = None
45 |         for i in range(recur_iter):
46 | 
47 |             if self.training:
48 |                 # Faster R-CNN subsamples during training the proposals with a fixed
49 |                 # positive / negative ratio
50 |                 with torch.no_grad():
51 |                     recur_proposals = self.loss_evaluator.subsample(recur_proposals, targets)
52 | 
53 |             # extract features that will be fed to the final classifier. The
54 |             # feature_extractor generally corresponds to the pooler + heads
55 |             x = self.feature_extractor(features, recur_proposals)
56 |             # final classifier that converts the features into predictions
57 |             class_logits, box_regression = self.predictor(x)
58 | 
59 |             if not self.training:
60 |                 recur_proposals = self.post_processor((class_logits, box_regression), recur_proposals, recur_iter - i - 1) # result
61 |             else:
62 |                 loss_classifier, loss_box_reg = self.loss_evaluator(
63 |                     [class_logits], [box_regression]
64 |                 )
65 |         if not self.training:
66 |             return x, recur_proposals, {}
67 | 
68 |         return (
69 |             x,
70 |             proposals,
71 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
72 |         )
73 | 
74 | 
75 | def build_roi_box_head(cfg):
76 |     """
77 |     Constructs a new box head.
78 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
79 |     and make it a parameter in the config
80 |     """
81 |     return ROIBoxHead(cfg)
82 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rbox_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | import torch
 4 | 
 5 | class FastRCNNPredictor(nn.Module):
 6 |     def __init__(self, config, pretrained=None):
 7 |         super(FastRCNNPredictor, self).__init__()
 8 | 
 9 |         stage_index = 4
10 |         stage2_relative_factor = 2 ** (stage_index - 1)
11 |         res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
12 |         num_inputs = res2_out_channels * stage2_relative_factor
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         self.bbox_pred = nn.Linear(num_inputs, num_classes * 5)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 | 
29 |         cls_logit = self.cls_score(x)
30 |         bbox_pred = self.bbox_pred(x)
31 | 
32 |         return cls_logit, bbox_pred
33 | 
34 | 
35 | class FPNPredictor(nn.Module):
36 |     def __init__(self, cfg):
37 |         super(FPNPredictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
40 | 
41 |         self.cls_score = nn.Linear(representation_size, num_classes)
42 |         self.bbox_pred = nn.Linear(representation_size, num_classes * 5)
43 | 
44 |         nn.init.normal_(self.cls_score.weight, std=0.01)
45 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
46 |         for l in [self.cls_score, self.bbox_pred]:
47 |             nn.init.constant_(l.bias, 0)
48 | 
49 |     def forward(self, x):
50 |         scores = self.cls_score(x)
51 |         bbox_deltas = self.bbox_pred(x)
52 | 
53 |         return scores, bbox_deltas
54 | 
55 | 
56 | _ROI_BOX_PREDICTOR = {
57 |     "FastRCNNPredictor": FastRCNNPredictor,
58 |     "FPNPredictor": FPNPredictor,
59 | }
60 | 
61 | 
62 | def make_roi_box_predictor(cfg):
63 |     func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
64 |     return func(cfg)
65 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rec_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rec_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rec_head/rec_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList, RBoxList
 6 | 
 7 | from .roi_rec_feature_extractors import make_roi_rec_feature_extractor
 8 | from .roi_rec_predictors import make_roi_rec_predictor
 9 | from .inference import make_roi_rec_post_processor
10 | from .loss import make_roi_rec_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], RBoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIRecHead(torch.nn.Module):
37 |     def __init__(self, cfg):
38 |         super(ROIRecHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_rec_feature_extractor(cfg)
41 |         self.predictor = make_roi_rec_predictor(cfg)
42 |         self.post_processor = make_roi_rec_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_rec_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None):
46 |         """
47 |         Arguments:
48 |             features (list[Tensor]): feature-maps from possibly several levels
49 |             proposals (list[BoxList]): proposal boxes
50 |             targets (list[BoxList], optional): the ground-truth targets.
51 | 
52 |         Returns:
53 |             x (Tensor): the result of the feature extractor
54 |             proposals (list[BoxList]): during training, the original proposals
55 |                 are returned. During testing, the predicted boxlists are returned
56 |                 with the `mask` field set
57 |             losses (dict[Tensor]): During training, returns the losses for the
58 |                 head. During testing, returns an empty dict.
59 |         """
60 | 
61 |         if self.training:
62 |             # during training, only focus on positive boxes
63 |             all_proposals = proposals
64 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
65 |         if self.training and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
66 |             x = features
67 |             x = x[torch.cat(positive_inds, dim=0)]
68 |         else:
69 |             x = self.feature_extractor(features, proposals)
70 |         rec_logits = self.predictor(x)
71 | 
72 |         if not self.training:
73 |             result = self.post_processor(rec_logits, proposals)
74 |             return x, result, {}
75 | 
76 |         loss_rec = self.loss_evaluator(proposals, rec_logits, targets)
77 | 
78 |         return x, all_proposals, dict(loss_rec=loss_rec)
79 | 
80 | 
81 | def build_roi_rec_head(cfg):
82 |     return ROIRecHead(cfg)
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rmask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/modeling/roi_heads/rmask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rmask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import RBoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], RBoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIMaskHead(torch.nn.Module):
37 |     def __init__(self, cfg):
38 |         super(ROIMaskHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg)
41 |         self.predictor = make_roi_mask_predictor(cfg)
42 |         self.post_processor = make_roi_mask_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None):
46 |         """
47 |         Arguments:
48 |             features (list[Tensor]): feature-maps from possibly several levels
49 |             proposals (list[BoxList]): proposal boxes
50 |             targets (list[BoxList], optional): the ground-truth targets.
51 | 
52 |         Returns:
53 |             x (Tensor): the result of the feature extractor
54 |             proposals (list[BoxList]): during training, the original proposals
55 |                 are returned. During testing, the predicted boxlists are returned
56 |                 with the `mask` field set
57 |             losses (dict[Tensor]): During training, returns the losses for the
58 |                 head. During testing, returns an empty dict.
59 |         """
60 | 
61 |         if self.training:
62 |             # during training, only focus on positive boxes
63 |             all_proposals = proposals
64 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
65 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
66 |             x = features
67 |             x = x[torch.cat(positive_inds, dim=0)]
68 |         else:
69 |             x = self.feature_extractor(features, proposals)
70 |         mask_logits = self.predictor(x)
71 | 
72 |         if not self.training:
73 |             result = self.post_processor(mask_logits, proposals)
74 |             return x, result, {}
75 | 
76 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
77 | 
78 |         return x, all_proposals, dict(loss_mask=loss_mask)
79 | 
80 | 
81 | def build_roi_mask_head(cfg):
82 |     return ROIMaskHead(cfg)
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rmask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler, PyramidRROIAlign
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 9 | 
10 | 
11 | class MaskRCNNFPNFeatureExtractor(nn.Module):
12 |     """
13 |     Heads for FPN for classification
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         """
18 |         Arguments:
19 |             num_classes (int): number of output classes
20 |             input_size (int): number of channels of the input once it's flattened
21 |             representation_size (int): size of the intermediate representation
22 |         """
23 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
24 | 
25 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
26 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
27 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
28 |         pooler = PyramidRROIAlign(
29 |             output_size=(resolution, resolution),
30 |             scales=scales,
31 |         )
32 |         input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
33 |         self.pooler = pooler
34 | 
35 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
36 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
37 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
38 | 
39 |         self.word_margin = cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN
40 |         self.det_margin = cfg.MODEL.RRPN.GT_BOX_MARGIN
41 | 
42 |         self.rescale = self.word_margin / self.det_margin
43 | 
44 |         next_feature = input_size
45 |         self.blocks = []
46 |         for layer_idx, layer_features in enumerate(layers, 1):
47 |             layer_name = "mask_fcn{}".format(layer_idx)
48 |             module = make_conv3x3(next_feature, layer_features, 
49 |                 dilation=dilation, stride=1, use_gn=use_gn
50 |             )
51 |             self.add_module(layer_name, module)
52 |             next_feature = layer_features
53 |             self.blocks.append(layer_name)
54 | 
55 |     def forward(self, x, proposals):
56 |         x = self.pooler(x, proposals)
57 | 
58 |         # resize_proposals = [proposal.rescale(self.rescale) for proposal in proposals]
59 |         # x = self.pooler(x, resize_proposals)
60 | 
61 |         for layer_name in self.blocks:
62 |             x = F.relu(getattr(self, layer_name)(x))
63 | 
64 |         return x
65 | 
66 | 
67 | _ROI_MASK_FEATURE_EXTRACTORS = {
68 |     "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor,
69 |     "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor,
70 | }
71 | 
72 | 
73 | def make_roi_mask_feature_extractor(cfg):
74 |     func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR]
75 |     return func(cfg)
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rmask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | 
 8 | 
 9 | class MaskRCNNC4Predictor(nn.Module):
10 |     def __init__(self, cfg):
11 |         super(MaskRCNNC4Predictor, self).__init__()
12 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
13 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
14 | 
15 |         if cfg.MODEL.ROI_HEADS.USE_FPN:
16 |             num_inputs = dim_reduced
17 |         else:
18 |             stage_index = 4
19 |             stage2_relative_factor = 2 ** (stage_index - 1)
20 |             res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
21 |             num_inputs = res2_out_channels * stage2_relative_factor
22 | 
23 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
24 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
25 | 
26 |         for name, param in self.named_parameters():
27 |             if "bias" in name:
28 |                 nn.init.constant_(param, 0)
29 |             elif "weight" in name:
30 |                 # Caffe2 implementation uses MSRAFill, which in fact
31 |                 # corresponds to kaiming_normal_ in PyTorch
32 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
33 | 
34 |     def forward(self, x):
35 |         x = F.relu(self.conv5_mask(x))
36 |         return self.mask_fcn_logits(x)
37 | 
38 | 
39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor}
40 | 
41 | 
42 | def make_roi_mask_predictor(cfg):
43 |     func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
44 |     return func(cfg)
45 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .box_head.box_head import build_roi_box_head
 5 | from .mask_head.mask_head import build_roi_mask_head
 6 | 
 7 | 
 8 | class CombinedROIHeads(torch.nn.ModuleDict):
 9 |     """
10 |     Combines a set of individual heads (for box prediction or masks) into a single
11 |     head.
12 |     """
13 | 
14 |     def __init__(self, cfg, heads):
15 |         super(CombinedROIHeads, self).__init__(heads)
16 |         self.cfg = cfg.clone()
17 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
18 |             self.mask.feature_extractor = self.box.feature_extractor
19 | 
20 |     def forward(self, features, proposals, targets=None):
21 |         losses = {}
22 |         # TODO rename x to roi_box_features, if it doesn't increase memory consumption
23 |         x, detections, loss_box = self.box(features, proposals, targets)
24 |         losses.update(loss_box)
25 |         if self.cfg.MODEL.MASK_ON:
26 |             mask_features = features
27 |             # optimization: during training, if we share the feature extractor between
28 |             # the box and the mask heads, then we can reuse the features already computed
29 |             if (
30 |                 self.training
31 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
32 |             ):
33 |                 mask_features = x
34 |             # During training, self.box() will return the unaltered proposals as "detections"
35 |             # this makes the API consistent during training and testing
36 |             x, detections, loss_mask = self.mask(mask_features, detections, targets)
37 |             losses.update(loss_mask)
38 |         return x, detections, losses
39 | 
40 | 
41 | def build_roi_heads(cfg):
42 |     # individually create the heads, that will be combined together
43 |     # afterwards
44 |     roi_heads = []
45 |     if not cfg.MODEL.RPN_ONLY:
46 |         roi_heads.append(("box", build_roi_box_head(cfg)))
47 |     if cfg.MODEL.MASK_ON:
48 |         roi_heads.append(("mask", build_roi_mask_head(cfg)))
49 | 
50 |     # combine individual heads in a single module
51 |     if roi_heads:
52 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
53 | 
54 |     return roi_heads
55 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/rroi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .rbox_head.box_head import build_roi_box_head
 5 | from .rec_head.rec_head import build_roi_rec_head
 6 | from .rmask_head.mask_head import build_roi_mask_head
 7 | 
 8 | class CombinedROIHeads(torch.nn.ModuleDict):
 9 |     """
10 |     Combines a set of individual heads (for box prediction or masks) into a single
11 |     head.
12 |     """
13 | 
14 |     def __init__(self, cfg, heads):
15 |         super(CombinedROIHeads, self).__init__(heads)
16 |         self.cfg = cfg.clone()
17 |         if cfg.MODEL.REC_ON and cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
18 |             self.rec.feature_extractor = self.box.feature_extractor
19 | 
20 |     def forward(self, features, proposals, targets=None):
21 |         losses = {}
22 |         # TODO rename x to roi_box_features, if it doesn't increase memory consumption
23 |         if self.cfg.MODEL.FP4P_ON:
24 |             # get you C4
25 |             x, detections, loss_box = self.box((features[-1], ), proposals, targets)
26 |         else:
27 |             x, detections, loss_box = self.box(features, proposals, targets)
28 |         losses.update(loss_box)
29 | 
30 |         if self.cfg.MODEL.MASK_ON:
31 |             mask_features = features
32 |             # optimization: during training, if we share the feature extractor between
33 |             # the box and the mask heads, then we can reuse the features already computed
34 |             if (
35 |                 self.training
36 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
37 |             ):
38 |                 mask_features = x
39 |             # During training, self.box() will return the unaltered proposals as "detections"
40 |             # this makes the API consistent during training and testing
41 |             # detach process
42 |             mask_features_detach = [feature.detach() for feature in mask_features]
43 |             x, detections, loss_mask = self.mask(mask_features_detach, detections, targets)
44 |             losses.update(loss_mask)
45 | 
46 |         if self.cfg.MODEL.REC_ON:
47 |             rec_features = features
48 |             # optimization: during training, if we share the feature extractor between
49 |             # the box and the mask heads, then we can reuse the features already computed
50 |             if (
51 |                 self.training
52 |                 and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
53 |             ):
54 |                 rec_features = x
55 |             # During training, self.box() will return the unaltered proposals as "detections"
56 |             # this makes the API consistent during training and testing
57 |             x, detections, loss_rec = self.rec(rec_features, detections, targets)
58 |             losses.update(loss_rec)
59 |         return x, detections, losses
60 | 
61 | 
62 | def build_roi_heads(cfg):
63 |     # individually create the heads, that will be combined together
64 |     # afterwards
65 |     roi_heads = []
66 |     if not cfg.MODEL.RPN_ONLY:
67 |         roi_heads.append(("box", build_roi_box_head(cfg)))
68 |     if cfg.MODEL.REC_ON:
69 |         roi_heads.append(("rec", build_roi_rec_head(cfg)))
70 |     if cfg.MODEL.MASK_ON:
71 |         roi_heads.append(("mask", build_roi_mask_head(cfg)))
72 | 
73 |     # combine individual heads in a single module
74 |     if roi_heads:
75 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
76 | 
77 |     return roi_heads
78 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rrpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR
 5 | 
 6 | 
 7 | def make_optimizer(cfg, model):
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 |         if not value.requires_grad:
11 |             continue
12 |         lr = cfg.SOLVER.BASE_LR
13 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
14 |         if "bias" in key:
15 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
16 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
17 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
18 | 
19 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
20 |     return optimizer
21 | 
22 | 
23 | def make_lr_scheduler(cfg, optimizer):
24 |     return WarmupMultiStepLR(
25 |         optimizer,
26 |         cfg.SOLVER.STEPS,
27 |         cfg.SOLVER.GAMMA,
28 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
29 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
30 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
31 |     )
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 | 
38 |     # print('tensors:', tensors.size(), 'isinstance(tensors, torch.Tensor):', isinstance(tensors, torch.Tensor))
39 | 
40 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
41 |         tensors = [tensors]
42 | 
43 |     if isinstance(tensors, ImageList):
44 |         return tensors
45 |     elif isinstance(tensors, torch.Tensor):
46 |         # single tensor shape can be inferred
47 |         assert tensors.dim() == 4
48 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
49 |         return ImageList(tensors, image_sizes)
50 |     elif isinstance(tensors, (tuple, list)):
51 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
52 | 
53 |         # TODO Ideally, just remove this and let me model handle arbitrary
54 |         # input sizs
55 |         if size_divisible > 0:
56 |             import math
57 | 
58 |             stride = size_divisible
59 |             max_size = list(max_size)
60 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
61 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
62 |             max_size = tuple(max_size)
63 | 
64 |         batch_shape = (len(tensors),) + max_size
65 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
66 |         for img, pad_img in zip(tensors, batched_imgs):
67 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
68 | 
69 |         image_sizes = [im.shape[-2:] for im in tensors]
70 | 
71 |         return ImageList(batched_imgs, image_sizes)
72 |     else:
73 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
74 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, "log.txt"))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | from torch.utils.model_zoo import _download_url_to_file
 6 | from torch.utils.model_zoo import urlparse
 7 | from torch.utils.model_zoo import HASH_REGEX
 8 | 
 9 | from maskrcnn_benchmark.utils.comm import is_main_process
10 | from maskrcnn_benchmark.utils.comm import synchronize
11 | 
12 | 
13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
14 | # but with a few improvements and modifications
15 | def cache_url(url, model_dir=None, progress=True):
16 |     r"""Loads the Torch serialized object at the given URL.
17 |     If the object is already present in `model_dir`, it's deserialized and
18 |     returned. The filename part of the URL should follow the naming convention
19 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
20 |     digits of the SHA256 hash of the contents of the file. The hash is used to
21 |     ensure unique names and to verify the contents of the file.
22 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
23 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
24 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
25 |     Args:
26 |         url (string): URL of the object to download
27 |         model_dir (string, optional): directory in which to save the object
28 |         progress (bool, optional): whether or not to display a progress bar to stderr
29 |     Example:
30 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
31 |     """
32 |     if model_dir is None:
33 |         torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch'))
34 |         model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models'))
35 |     if not os.path.exists(model_dir):
36 |         os.makedirs(model_dir)
37 |     parts = urlparse(url)
38 |     filename = os.path.basename(parts.path)
39 |     if filename == "model_final.pkl":
40 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
41 |         # so make the full path the filename by replacing / with _
42 |         filename = parts.path.replace("/", "_")
43 |     cached_file = os.path.join(model_dir, filename)
44 |     if not os.path.exists(cached_file) and is_main_process():
45 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
46 |         hash_prefix = HASH_REGEX.search(filename)
47 |         if hash_prefix is not None:
48 |             hash_prefix = hash_prefix.group(1)
49 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
50 |             # which matches the hash PyTorch uses. So we skip the hash matching
51 |             # if the hash_prefix is less than 6 characters
52 |             if len(hash_prefix) < 6:
53 |                 hash_prefix = None
54 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
55 |     synchronize()
56 |     return cached_file
57 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/rec_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import collections
  4 | 
  5 | 
  6 | class Coder:
  7 | 
  8 |     def __init__(self, alphabet_file):
  9 |         # All char in one line
 10 |         self.alphabet = open(alphabet_file, 'r').readlines()[0].replace('\n', '')
 11 |         self.dictionary = {}
 12 |         self.label_to_char = {}
 13 | 
 14 |         cnt = 1
 15 |         for ch in self.alphabet:
 16 |             self.dictionary[ch] = cnt
 17 |             self.label_to_char[cnt] = ch
 18 |             cnt += 1
 19 | 
 20 |     def encode(self, word_str):
 21 | 
 22 |         labels = []
 23 |         for ch in word_str:
 24 |             if ch in self.alphabet:
 25 |                 labels.append(self.dictionary[ch])
 26 | 
 27 |         return labels
 28 | 
 29 |     def decode(self, labels):
 30 | 
 31 |         dec_str = ''
 32 |         for label in labels:
 33 |             if label in self.label_to_char:
 34 |                 dec_str += self.label_to_char[label]
 35 | 
 36 |         return dec_str
 37 | 
 38 | class StrLabelConverter(object):
 39 | 
 40 |     def __init__(self, alphabet):
 41 |         self.alphabet = alphabet + '-'  # for `-1` index
 42 | 
 43 |         self.dict = {}
 44 |         for i, char in enumerate(alphabet):
 45 |             # NOTE: 0 is reserved for 'blank' required by wrap_ctc
 46 |             self.dict[char] = i + 1
 47 |         print('------------------- alphabet -------------------')
 48 |         print('alphabet:', self.alphabet)
 49 |         print('------------------------------------------------')
 50 |     def encode(self, text, depth=0):
 51 |         """Support batch or single str."""
 52 |         if isinstance(text, str):
 53 |             for char in text:
 54 |                 if self.alphabet.find(char) == -1:
 55 |                     print(char)
 56 |             text = [self.dict[char] for char in text]
 57 |             length = [len(text)]
 58 |         elif isinstance(text, collections.Iterable):
 59 |             length = [len(s) for s in text]
 60 |             text = ''.join(text)
 61 |             text, _ = self.encode(text)
 62 | 
 63 |         if depth:
 64 |             return text, len(text)
 65 |         #return (torch.IntTensor(text), torch.IntTensor(length))
 66 |         return (text, length)
 67 | 
 68 |     def decode(self, t, length, raw=False):
 69 |         if length.numel() == 1:
 70 |             length = length[0]
 71 |             t = t[:length]
 72 |             if raw:
 73 |                 return ''.join([self.alphabet[i - 1] for i in t])
 74 |             else:
 75 |                 char_list = []
 76 |                 for i in range(length):
 77 |                     if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
 78 |                         char_list.append(self.alphabet[t[i] - 1])
 79 |                 return ''.join(char_list)
 80 |         else:
 81 |             texts = []
 82 |             index = 0
 83 |             for i in range(length.numel()):
 84 |                 l = length[i]
 85 |                 texts.append(self.decode(
 86 |                     t[index:index + l], torch.IntTensor([l]), raw=raw))
 87 |                 index += l
 88 |             return texts
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 | 
 93 |     alpha_f = 'alpha.txt'
 94 |     coder = Coder(alpha_f)
 95 | 
 96 |     words = ['shits', 'bull', 'fXxk']
 97 | 
 98 |     for w in words:
 99 |         code = coder.encode(w)
100 |         print('code:', code)
101 |         word = coder.decode(code)
102 |         print('word:', word)
103 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/rotation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjq11302010044/RRPN_pytorch/a966f6f238c03498514742cde5cd98e51efb440c/rotation/__init__.py


--------------------------------------------------------------------------------
/rotation/rbbox_overlaps.hpp:
--------------------------------------------------------------------------------
1 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id);
2 | 
3 | 


--------------------------------------------------------------------------------
/rotation/rbbox_overlaps.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | cdef extern from "rbbox_overlaps.hpp":
 5 |     void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int)
 6 | 
 7 | def rbbx_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0):
 8 |     cdef int N = boxes.shape[0]
 9 |     cdef int K = query_boxes.shape[0]
10 |     cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32)
11 |     _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id)
12 |     return overlaps
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/rotation/rotate_circle_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | def a(np.float x,np.float r):
 5 |     return 0.5*3.1415926535*r*r-x*np.sqrt(r*r-x*x) - r*r*np.arcsin(x/r)
 6 | 
 7 | 
 8 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 9 |     cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0]
10 |     cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1]
11 |     cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2]
12 |     cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3]
13 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5]
14 | 
15 |     cdef np.ndarray[np.float32_t, ndim=1] rs = np.sqrt(heights**2+widths**2)/2.0
16 |     cdef np.ndarray[np.float32_t, ndim=1] areas = 3.1415926535*rs*rs
17 | 
18 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
19 |     cdef int ndets = dets.shape[0]
20 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int)
21 | 
22 |     # nominal indices
23 |     cdef int _i, _j
24 |     # sorted indices
25 |     cdef int i, j
26 |     # temp variables for box i's (the box currently under consideration)
27 |     cdef np.float32_t ix_ctr, iy_ctr, ir, iarea
28 |     # variables for computing overlap with box j (lower scoring box)
29 |     cdef np.float32_t xx_ctr, yy_ctr,rr
30 |     cdef np.float32_t inter, ovr
31 |     
32 |     cdef np.float32_t d,x1,x2,s,r1,r2
33 |     keep = []
34 |     for _i in range(ndets):
35 |         i = order[_i]
36 |         if suppressed[i] == 1:
37 |             continue
38 |         keep.append(i)
39 |         ix_ctr = x_ctrs[i]
40 |         iy_ctr = y_ctrs[i]
41 |         ir = rs[i]
42 |         iarea = areas[i]
43 | 
44 |         for _j in range(_i+1,ndets):
45 |             j = order[_j]
46 |             if suppressed[j] == 1:
47 |                 continue
48 |             xx_ctr = x_ctrs[j]
49 |             yy_ctr = y_ctrs[j]
50 |             rr = rs[j]
51 | 
52 |             d = np.sqrt((ix_ctr-xx_ctr)**2+(iy_ctr-yy_ctr)**2)
53 |             
54 |             if ir<=rr:
55 |                 r1 = ir
56 |                 r2 = rr
57 |             else:
58 |                 r1 = rr
59 |                 r2 = ir
60 | 
61 |             if d > 0.0:
62 |                 x1 = (d*d+r1*r1-r2*r2)/(2*d)
63 |                 x2 = (d*d+r2*r2-r1*r1)/(2*d)
64 |                 s = (r2*r2-r1*r1-d*d)/(2*d)
65 |             #else: Avoid Warning
66 |             #    x1 = 0
67 |             #    x2 = 0
68 |             #    s = 0
69 | 
70 |             if d<=r2-r1:
71 |                 inter = 3.1415926535*r1*r1
72 |             elif d>=r2+r1 or r2 == 0 or r1 == 0:
73 |                 inter = 0.0
74 |             else:
75 |                 if d*d<r2*r2-r1*r1:
76 |                     inter = 3.1415926535*r1*r1-a(s,r1)+a(s+d,r2)
77 |                 else:
78 |                     inter = a(x1,r1)+a(x2,r2)
79 | 
80 |             ovr = inter/(iarea+areas[j]-inter)
81 |             #print i,j,ovr
82 |             #print r1,r2,d
83 |             if ovr>=thresh:
84 |                 suppressed[j]=1
85 |     return keep
86 | 


--------------------------------------------------------------------------------
/rotation/rotate_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import time
 4 | import math
 5 | def rotate_cpu_nms(dets, threshold):
 6 | 	'''
 7 | 	Parameters
 8 | 	----------------
 9 | 	dets: (N, 6) --- x_ctr, y_ctr, height, width, angle, score
10 | 	threshold: 0.7 or 0.5 IoU
11 | 	----------------
12 | 	Returns
13 | 	---------------- 
14 | 	keep: keep the remaining index of dets
15 | 	'''
16 | 	keep = []
17 | 	scores = dets[:, -1]
18 | 	
19 | 	tic = time.time()
20 |  
21 | 	order = scores.argsort()[::-1]
22 | 	ndets = dets.shape[0]
23 | 	print "nms start"
24 | 	print ndets
25 | 	suppressed = np.zeros((ndets), dtype = np.int)
26 | 	
27 | 	
28 | 
29 | 	for _i in range(ndets):
30 | 		i = order[_i]
31 | 		if suppressed[i] == 1:
32 | 			continue
33 | 		keep.append(i)
34 | 		r1 = ((dets[i,0],dets[i,1]),(dets[i,3],dets[i,2]),dets[i,4])
35 | 		area_r1 = dets[i,2]*dets[i,3]
36 | 		for _j in range(_i+1,ndets):
37 | 			#tic = time.time()
38 | 			j = order[_j]
39 | 			if suppressed[j] == 1:
40 | 				continue
41 | 			r2 = ((dets[j,0],dets[j,1]),(dets[j,3],dets[j,2]),dets[j,4])
42 | 			area_r2 = dets[j,2]*dets[j,3]
43 | 			ovr = 0.0	
44 | 			#+++
45 | 			#d = math.sqrt((dets[i,0] - dets[j,0])**2 + (dets[i,1] - dets[j,1])**2)
46 | 			#d1 = math.sqrt(dets[i,2]**2 + dets[i,3]**2)
47 | 			#d2 = math.sqrt(dets[j,2]**2 + dets[j,3]**2)
48 | 			#if d<d1+d2:
49 | 				#+++
50 | 						
51 | 			int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
52 | 			if  None != int_pts:
53 | 				
54 | 				order_pts = cv2.convexHull(int_pts, returnPoints = True)
55 | 				#t2 = time.time()
56 |     				int_area = cv2.contourArea(order_pts)				
57 | 				#t3 = time.time()
58 | 				ovr = int_area*1.0/(area_r1+area_r2-int_area)
59 | 				
60 | 			if ovr>=threshold:
61 | 				suppressed[j]=1
62 | 			#print t1 - tic, t2 - t1, t3 - t2
63 | 			#print 
64 | 	print time.time() - tic
65 | 	print "nms done"
66 | 	return keep
67 | 
68 | 
69 | 
70 | if __name__ == "__main__":
71 | 
72 | 	boxes = np.array([
73 | 			[50, 50, 100, 100, 0,0.99],
74 | 			[60, 60, 100, 100, 0,0.88],#keep 0.68
75 | 			[50, 50, 100, 100, 45.0,0.66],#discard 0.70
76 | 			[200, 200, 100, 100, 0,0.77],#keep 0.0
77 | 			
78 | 		])
79 | 
80 | 	#boxes = np.tile(boxes, (4500 / 4, 1))
81 | 
82 | 	#for ind in range(4500):
83 | 	#	boxes[ind, 5] = 0
84 | 
85 | 	a = rotate_cpu_nms(boxes, 0.7)
86 | 
87 | 	print boxes[a]
88 | 


--------------------------------------------------------------------------------
/rotation/rotate_cpython_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | import cv2
 4 | cimport cv2
 5 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 6 |     cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0]
 7 |     cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1]
 8 |     cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2]
 9 |     cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3]
10 |     cdef np.ndarray[np.float32_t, ndim=1] angles = dets[:, 4]
11 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5]
12 |     cdef np.ndarray[np.float32_t, ndim=1] areas = heights * widths
13 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
14 |     cdef int ndets = dets.shape[0]
15 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int)
16 |     # nominal indices
17 |     cdef int _i, _j
18 |     # sorted indices
19 |     cdef int i, j
20 |     # temp variables for box i's (the box currently under consideration)
21 |     cdef np.float32_t ix_ctr, iy_ctr, ih, iw, ia, iarea
22 |     # variables for computing overlap with box j (lower scoring box)
23 |     cdef np.float32_t xx_ctr, yy_ctr, hh, ww, aa
24 |     cdef np.float32_t inter, ovr
25 |     keep = []
26 |     for _i in range(ndets):
27 |         i = order[_i]
28 |         if suppressed[i] == 1:
29 |             continue
30 |         keep.append(i)
31 |         ix_ctr = x_ctrs[i]
32 |         iy_ctr = y_ctrs[i]
33 |         ih = heights[i]
34 |         iw = widths[i]
35 |         ia = angles[i]
36 |         iarea = areas[i]
37 |         r1 = ((ix_ctr, iy_ctr), (ih, iw), ia)
38 |         for _j in range(_i+1,ndets):
39 |             j = order[_j]
40 |             if suppressed[j] == 1:
41 |                 continue
42 |             xx_ctr = x_ctrs[j]
43 |             yy_ctr = y_ctrs[j]
44 |             hh = heights[j]
45 |             ww = widths[j]
46 |             aa = angles[j]
47 |             r2 = ((xx_ctr,yy_ctr),(ww,hh),aa)
48 |             ovr = 0.0
49 |             int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
50 |             if  None != int_pts:
51 |                 order_pts = cv2.convexHull(int_pts, returnPoints = True)
52 |                 inter = cv2.contourArea(order_pts)
53 |                 ovr = int_area*1.0 / (r1+areas[j]-inter)
54 |             if ovr>=thresh:
55 |                 suppressed[j]=1
56 | return keep
57 | 


--------------------------------------------------------------------------------
/rotation/rotate_cython_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | import cv2
 4 | def rotate_cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 5 |     cdef np.ndarray[np.float32_t, ndim=1] x_ctrs = dets[:, 0]
 6 |     cdef np.ndarray[np.float32_t, ndim=1] y_ctrs = dets[:, 1]
 7 |     cdef np.ndarray[np.float32_t, ndim=1] heights = dets[:, 2]
 8 |     cdef np.ndarray[np.float32_t, ndim=1] widths = dets[:, 3]
 9 |     cdef np.ndarray[np.float32_t, ndim=1] angles = dets[:, 4]
10 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 5]
11 |     cdef np.ndarray[np.float32_t, ndim=1] areas = heights * widths
12 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
13 |     cdef int ndets = dets.shape[0]
14 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = np.zeros((ndets), dtype=np.int)
15 |     # nominal indices
16 |     cdef int _i, _j
17 |     # sorted indices
18 |     cdef int i, j
19 |     # temp variables for box i's (the box currently under consideration)
20 |     cdef np.float32_t ix_ctr, iy_ctr, ih, iw, ia, iarea
21 |     # variables for computing overlap with box j (lower scoring box)
22 |     cdef np.float32_t xx_ctr, yy_ctr, hh, ww, aa
23 |     cdef np.float32_t inter, ovr
24 |     keep = []
25 |     for _i in range(ndets):
26 |         i = order[_i]
27 |         if suppressed[i] == 1:
28 |             continue
29 |         keep.append(i)
30 |         ix_ctr = x_ctrs[i]
31 |         iy_ctr = y_ctrs[i]
32 |         ih = heights[i]
33 |         iw = widths[i]
34 |         ia = angles[i]
35 |         iarea = areas[i]
36 |         r1 = ((ix_ctr, iy_ctr), (ih, iw), ia)
37 |         for _j in range(_i+1,ndets):
38 |             j = order[_j]
39 |             if suppressed[j] == 1:
40 |                 continue
41 |             xx_ctr = x_ctrs[j]
42 |             yy_ctr = y_ctrs[j]
43 |             hh = heights[j]
44 |             ww = widths[j]
45 |             aa = angles[j]
46 |             r2 = ((xx_ctr,yy_ctr),(ww,hh),aa)
47 |             ovr = 0.0
48 |             int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
49 |             if  None != int_pts:
50 |                 order_pts = cv2.convexHull(int_pts, returnPoints = True)
51 |                 inter = cv2.contourArea(order_pts)
52 |                 ovr = inter*1.0 / (iarea+areas[j]-inter)
53 |             if ovr>=thresh:
54 |                 suppressed[j]=1
55 |     return keep
56 | 


--------------------------------------------------------------------------------
/rotation/rotate_gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/rotation/rotate_gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | assert sizeof(int) == sizeof(np.int32_t)
 5 | 
 6 | cdef extern from "rotate_gpu_nms.hpp":
 7 |     void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
 8 | 
 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
10 |             np.int32_t device_id=0):
11 |     cdef int boxes_num = dets.shape[0]
12 |     cdef int boxes_dim = dets.shape[1]
13 |     cdef int num_out
14 |     cdef np.ndarray[np.int32_t, ndim=1] \
15 |         keep = np.zeros(boxes_num, dtype=np.int32)
16 |     cdef np.ndarray[np.float32_t, ndim=1] \
17 |         scores = dets[:, 5]
18 |     cdef np.ndarray[np.int_t, ndim=1] \
19 |         order = scores.argsort()[::-1]
20 |     cdef np.ndarray[np.float32_t, ndim=2] \
21 |         sorted_dets = dets[order, :]
22 |     _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
23 |     keep = keep[:num_out]
24 |     return list(order[keep])
25 | 


--------------------------------------------------------------------------------
/rotation/rotate_polygon_nms.hpp:
--------------------------------------------------------------------------------
1 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/rotation/rotate_polygon_nms.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | 
 4 | assert sizeof(int) == sizeof(np.int32_t)
 5 | 
 6 | cdef extern from "rotate_gpu_nms.hpp":
 7 |     void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
 8 | 
 9 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
10 |             np.int32_t device_id=0):
11 |     cdef int boxes_num = dets.shape[0]
12 |     cdef int boxes_dim = dets.shape[1]
13 |     cdef int num_out
14 |     cdef np.ndarray[np.int32_t, ndim=1] \
15 |         keep = np.zeros(boxes_num, dtype=np.int32)
16 |     cdef np.ndarray[np.float32_t, ndim=1] \
17 |         scores = dets[:, 5]
18 |     cdef np.ndarray[np.int_t, ndim=1] \
19 |         order = scores.argsort()[::-1]
20 |     cdef np.ndarray[np.float32_t, ndim=2] \
21 |         sorted_dets = dets[order, :]
22 |     _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
23 |     keep = keep[:num_out]
24 |     return list(order[keep])
25 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/tests/test_metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | 
 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 5 | 
 6 | 
 7 | class TestMetricLogger(unittest.TestCase):
 8 |     def test_update(self):
 9 |         meter = MetricLogger()
10 |         for i in range(10):
11 |             meter.update(metric=float(i))
12 |         
13 |         m = meter.meters["metric"]
14 |         self.assertEqual(m.count, 10)
15 |         self.assertEqual(m.total, 45)
16 |         self.assertEqual(m.median, 4)
17 |         self.assertEqual(m.avg, 4.5)
18 | 
19 |     def test_no_attr(self):
20 |         meter = MetricLogger()
21 |         _ = meter.meters
22 |         _ = meter.delimiter
23 |         def broken():
24 |             _ = meter.not_existent
25 |         self.assertRaises(AttributeError, broken)
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/tools/cityscapes/instances2dict_with_polygons.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # Convert instances from png files to a dictionary
 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111
 5 | 
 6 | from __future__ import print_function, absolute_import, division
 7 | import os, sys
 8 | 
 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
10 | from csHelpers import *
11 | 
12 | # Cityscapes imports
13 | from cityscapesscripts.evaluation.instance import *
14 | from cityscapesscripts.helpers.csHelpers import *
15 | import cv2
16 | from maskrcnn_benchmark.utils import cv2_util
17 | 
18 | 
19 | def instances2dict_with_polygons(imageFileList, verbose=False):
20 |     imgCount     = 0
21 |     instanceDict = {}
22 | 
23 |     if not isinstance(imageFileList, list):
24 |         imageFileList = [imageFileList]
25 | 
26 |     if verbose:
27 |         print("Processing {} images...".format(len(imageFileList)))
28 | 
29 |     for imageFileName in imageFileList:
30 |         # Load image
31 |         img = Image.open(imageFileName)
32 | 
33 |         # Image as numpy array
34 |         imgNp = np.array(img)
35 | 
36 |         # Initialize label categories
37 |         instances = {}
38 |         for label in labels:
39 |             instances[label.name] = []
40 | 
41 |         # Loop through all instance ids in instance image
42 |         for instanceId in np.unique(imgNp):
43 |             if instanceId < 1000:
44 |                 continue
45 |             instanceObj = Instance(imgNp, instanceId)
46 |             instanceObj_dict = instanceObj.toDict()
47 | 
48 |             #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict())
49 |             if id2label[instanceObj.labelID].hasInstances:
50 |                 mask = (imgNp == instanceId).astype(np.uint8)
51 |                 contour, hier = cv2_util.findContours(
52 |                     mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
53 | 
54 |                 polygons = [c.reshape(-1).tolist() for c in contour]
55 |                 instanceObj_dict['contours'] = polygons
56 | 
57 |             instances[id2label[instanceObj.labelID].name].append(instanceObj_dict)
58 | 
59 |         imgKey = os.path.abspath(imageFileName)
60 |         instanceDict[imgKey] = instances
61 |         imgCount += 1
62 | 
63 |         if verbose:
64 |             print("\rImages Processed: {}".format(imgCount), end=' ')
65 |             sys.stdout.flush()
66 | 
67 |     if verbose:
68 |         print("")
69 | 
70 |     return instanceDict
71 | 
72 | def main(argv):
73 |     fileList = []
74 |     if (len(argv) > 2):
75 |         for arg in argv:
76 |             if ("png" in arg):
77 |                 fileList.append(arg)
78 |     instances2dict_with_polygons(fileList, True)
79 | 
80 | if __name__ == "__main__":
81 |     main(sys.argv[1:])
82 | 


--------------------------------------------------------------------------------