├── .flake8
├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   ├── feature-request.md
    │   └── questions-help-support.md
├── .gitignore
├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── OLD_README.md
├── README.md
├── TROUBLESHOOTING.md
├── arch.png
├── configs
    ├── caffe2
    │   ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x.yaml
    ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
    ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_50_C4_1x.yaml
    ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── quick_schedules
    │   ├── e2e_faster_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── rpn_R_50_C4_quick.yaml
    │   └── rpn_R_50_FPN_quick.yaml
    └── retina
    │   ├── retinanet_R-101-FPN_1x.yaml
    │   ├── retinanet_R-50-FPN_1x.yaml
    │   ├── retinanet_R-50-FPN_1x_adjust_std011.yaml
    │   ├── retinanet_R-50-FPN_1x_adjust_std100.yaml
    │   ├── retinanet_R-50-FPN_1x_adjustl1.yaml
    │   ├── retinanet_R-50-FPN_1x_beta100.yaml
    │   ├── retinanet_R-50-FPN_1x_low_quality_0.2.yaml
    │   ├── retinanet_R-50-FPN_1x_low_quality_0.3.yaml
    │   ├── retinanet_R-50-FPN_1x_low_quality_0.4.yaml
    │   ├── retinanet_R-50-FPN_1x_no_low_quality.yaml
    │   ├── retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml
    │   ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml
    │   ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml
    │   ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml
    │   ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml
    │   ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml
    │   ├── retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml
    │   ├── retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml
    │   ├── retinanet_mask_R-50-FPN_1.5x.yaml
    │   ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml
    │   ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml
    │   ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml
    │   ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml
    │   ├── retinanet_mask_R-50-FPN_1x.yaml
    │   ├── retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml
    │   ├── retinanet_mask_R-50-FPN_canonical5_1.5x.yaml
    │   ├── retinanet_mask_R-50-FPN_canonical5_1x.yaml
    │   ├── retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml
    │   └── retinanet_mask_p2p7_R-50-FPN_1x.yaml
├── demo
    ├── Mask_R-CNN_demo.ipynb
    ├── README.md
    ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
    ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
    ├── predictor.py
    └── webcam.py
├── maskrcnn_benchmark
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── SigmoidFocalLoss.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── nms.h
    │   └── vision.cpp
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── concat_dataset.py
    │   │   └── list_dataset.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── adjust_smooth_l1_loss.py
    │   ├── batch_norm.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   ├── sigmoid_focal_loss.py
    │   └── smooth_l1_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   ├── generalized_rcnn.py
    │   │   └── retinanet.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── retinanet.py
    │   │   ├── retinanet_detail_infer.py
    │   │   ├── retinanet_infer.py
    │   │   ├── retinanet_loss.py
    │   │   └── rpn.py
    │   └── utils.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── env.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   └── model_zoo.py
├── run_test_R-50.sh
├── run_test_R-50_dist.sh
├── setup.py
├── tests
    ├── checkpoint.py
    └── test_data_samplers.py
└── tools
    ├── parse_log.py
    ├── test_net.py
    └── train_net.py


/.flake8:
--------------------------------------------------------------------------------
1 | # This is an example .flake8 config, used when developing *Black* itself.
2 | # Keep in sync with setup.cfg which is used for source packages.
3 | 
4 | [flake8]
5 | ignore = E203, E266, E501, W503
6 | max-line-length = 80
7 | max-complexity = 18
8 | select = B,C,E,F,W,T4,B9
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F41B Bug Report"
 3 | about: Submit a bug report to help us improve Mask R-CNN Benchmark
 4 | 
 5 | ---
 6 | 
 7 | ## 🐛 Bug
 8 | 
 9 | <!-- A clear and concise description of what the bug is. -->
10 | 
11 | ## To Reproduce
12 | 
13 | Steps to reproduce the behavior:
14 | 
15 | 1.
16 | 1.
17 | 1.
18 | 
19 | <!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
20 | 
21 | ## Expected behavior
22 | 
23 | <!-- A clear and concise description of what you expected to happen. -->
24 | 
25 | ## Environment
26 | 
27 | Please copy and paste the output from the
28 | [environment collection script from PyTorch](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py)
29 | (or fill out the checklist below manually).
30 | 
31 | You can get the script and run it with:
32 | ```
33 | wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
34 | # For security purposes, please check the contents of collect_env.py before running it.
35 | python collect_env.py
36 | ```
37 | 
38 |  - PyTorch Version (e.g., 1.0):
39 |  - OS (e.g., Linux):
40 |  - How you installed PyTorch (`conda`, `pip`, source):
41 |  - Build command you used (if compiling from source):
42 |  - Python version:
43 |  - CUDA/cuDNN version:
44 |  - GPU models and configuration:
45 |  - Any other relevant information:
46 | 
47 | ## Additional context
48 | 
49 | <!-- Add any other context about the problem here. -->
50 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680Feature Request"
 3 | about: Submit a proposal/request for a new Mask R-CNN Benchmark feature
 4 | 
 5 | ---
 6 | 
 7 | ## 🚀 Feature
 8 | <!-- A clear and concise description of the feature proposal -->
 9 | 
10 | ## Motivation
11 | 
12 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
13 | 
14 | ## Pitch
15 | 
16 | <!-- A clear and concise description of what you want to happen. -->
17 | 
18 | ## Alternatives
19 | 
20 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
21 | 
22 | ## Additional context
23 | 
24 | <!-- Add any other context or screenshots about the feature request here. -->
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-help-support.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "❓Questions/Help/Support"
3 | about: Do you need support?
4 | 
5 | ---
6 | 
7 | ## ❓ Questions and Help
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | _ext
 4 | *.pyc
 5 | *.so
 6 | maskrcnn_benchmark.egg-info/
 7 | build/
 8 | dist/
 9 | 
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 | 
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 | 
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 | 
25 | # Pycharm editor settings
26 | .idea
27 | 
28 | # project dirs
29 | /datasets
30 | /models
31 | 


--------------------------------------------------------------------------------
/ABSTRACTIONS.md:
--------------------------------------------------------------------------------
 1 | ## Abstractions
 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to
 3 | have in mind are the following:
 4 | 
 5 | ### ImageList
 6 | In PyTorch, the first dimension of the input to the network generally represents
 7 | the batch dimension, and thus all elements of the same batch have the same
 8 | height / width.
 9 | In order to support images with different sizes and aspect ratios in the same
10 | batch, we created the `ImageList` class, which holds internally a batch of
11 | images (os possibly different sizes). The images are padded with zeros such that
12 | they have the same final size and batched over the first dimension. The original
13 | sizes of the images before padding are stored in the `image_sizes` attribute,
14 | and the batched tensor in `tensors`.
15 | We provide a convenience function `to_image_list` that accepts a few different
16 | input types, including a list of tensors, and returns an `ImageList` object.
17 | 
18 | ```python
19 | from maskrnn_benchmark.structures.image_list import to_image_list
20 | 
21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)]
22 | batched_images = to_image_list(images)
23 | 
24 | # it is also possible to make the final batched image be a multiple of a number
25 | batched_images_32 = to_image_list(images, size_divisible=32)
26 | ```
27 | 
28 | ### BoxList
29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for
30 | a specific image, as well as the size of the image as a `(width, height)` tuple.
31 | It also contains a set of methods that allow to perform geometric
32 | transformations to the bounding boxes (such as cropping, scaling and flipping).
33 | The class accepts bounding boxes from two different input formats:
34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and
35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`.
36 | 
37 | Additionally, each `BoxList` instance can also hold arbitrary additional information
38 | for each bounding box, such as labels, visibility, probability scores etc.
39 | 
40 | Here is an example on how to create a `BoxList` from a list of coordinates:
41 | ```python
42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT
43 | 
44 | width = 100
45 | height = 200
46 | boxes = [
47 |   [0, 10, 50, 50],
48 |   [50, 20, 90, 60],
49 |   [10, 10, 50, 50]
50 | ]
51 | # create a BoxList with 3 boxes
52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy')
53 | 
54 | # perform some box transformations, has similar API as PIL.Image
55 | bbox_scaled = bbox.resize((width * 2, height * 3))
56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT)
57 | 
58 | # add labels for each bbox
59 | labels = torch.tensor([0, 10, 1])
60 | bbox.add_field('labels', labels)
61 | 
62 | # bbox also support a few operations, like indexing
63 | # here, selects boxes 0 and 2
64 | bbox_subset = bbox[[0, 2]]
65 | ```
66 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Mask-RCNN Benchmark
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `master`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Facebook's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces for indentation rather than tabs
34 | * 80 character line length
35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/)
36 | 
37 | ## License
38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed
39 | under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements:
 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/
 5 | - torchvision from master
 6 | - cocoapi
 7 | - yacs
 8 | - matplotlib
 9 | - GCC >= 4.9
10 | - (optional) OpenCV for the webcam demo
11 | 
12 | 
13 | ### Step-by-step installation
14 | 
15 | ```bash
16 | # first, make sure that your conda is setup properly with the right environment
17 | # for that, check that `which conda`, `which pip` and `which python` points to the
18 | # right path. From a clean conda env, this is what you need to do
19 | 
20 | conda create --name maskrcnn_benchmark
21 | source activate maskrcnn_benchmark
22 | 
23 | # this installs the right pip and dependencies for the fresh python
24 | conda install ipython
25 | 
26 | # maskrnn_benchmark and coco api dependencies
27 | pip install ninja yacs cython matplotlib
28 | 
29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
30 | # we give the instructions for CUDA 9.0
31 | conda install pytorch-nightly -c pytorch
32 | 
33 | # install torchvision
34 | cd ~/github
35 | git clone https://github.com/pytorch/vision.git
36 | cd vision
37 | python setup.py install
38 | 
39 | # install pycocotools
40 | cd ~/github
41 | git clone https://github.com/cocodataset/cocoapi.git
42 | cd cocoapi/PythonAPI
43 | python setup.py build_ext install
44 | 
45 | # install PyTorch Detection
46 | cd ~/github
47 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
48 | cd maskrcnn-benchmark
49 | # the following will install the lib with
50 | # symbolic links, so that you can modify
51 | # the files if you want and won't need to
52 | # re-build it
53 | python setup.py build develop
54 | 
55 | # or if you are on macOS
56 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
57 | ```
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Facebook
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | ## Model Zoo and Baselines
 2 | 
 3 | ### Hardware
 4 | - 8 NVIDIA V100 GPUs
 5 | 
 6 | ### Software
 7 | - PyTorch version: 1.0.0a0+dd2c487
 8 | - CUDA 9.2
 9 | - CUDNN 7.1
10 | - NCCL 2.2.13-1
11 | 
12 | ### End-to-end Faster and Mask R-CNN baselines
13 | 
14 | All the baselines were trained using the exact same experimental setup as in Detectron.
15 | We initialize the detection models with ImageNet weights from Caffe2, the same as used by Detectron.
16 | 
17 | The pre-trained models are available in the link in the model id.
18 | 
19 | backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | mask AP | model id
20 | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
21 | R-50-C4 | Fast | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.17130 | 34.8 | - | [6358800](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_C4_1x.pth)
22 | R-50-FPN | Fast | 1x | 2 | 4.4 | 0.3530 | 8.8 | 0.12580 | 36.8 | - | [6358793](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_FPN_1x.pth)
23 | R-101-FPN | Fast | 1x | 2 | 7.1 | 0.4591 | 11.5 | 0.143149 | 39.1 | - | [6358804](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_101_FPN_1x.pth)
24 | X-101-32x8d-FPN | Fast | 1x | 1 | 7.6 | 0.7007 | 35.0 | 0.209965 | 41.2 | - | [6358717](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth)
25 | R-50-C4 | Mask | 1x | 1 | 5.8 | 0.4520 | 22.6 | 0.17796 + 0.028 | 35.6 | 31.5 | [6358801](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_C4_1x.pth)
26 | R-50-FPN | Mask | 1x | 2 | 5.2 | 0.4536 | 11.3 | 0.12966 + 0.034 | 37.8 | 34.2 | [6358792](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth)
27 | R-101-FPN | Mask | 1x | 2 | 7.9 | 0.5665 | 14.2 | 0.15384 + 0.034 | 40.1 | 36.1 | [6358805](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth)
28 | X-101-32x8d-FPN | Mask | 1x | 1 | 7.8 | 0.7562 | 37.8 | 0.21739 + 0.034 | 42.2 | 37.8 | [6358718](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth)
29 | 
30 | 
31 | ## Comparison with Detectron and mmdetection
32 | 
33 | In the following section, we compare our implementation with [Detectron](https://github.com/facebookresearch/Detectron)
34 | and [mmdetection](https://github.com/open-mmlab/mmdetection).
35 | The same remarks from [mmdetection](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#training-speed)
36 | about different hardware applies here.
37 | 
38 | ### Training speed
39 | 
40 | The numbers here are in seconds / iteration. The lower, the better.
41 | 
42 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
43 | -- | -- | -- | --
44 | Faster R-CNN R-50 C4 | 0.566 | - | 0.4036
45 | Faster R-CNN R-50 FPN | 0.544 | 0.554 | 0.3530
46 | Faster R-CNN R-101 FPN | 0.647 | - | 0.4591
47 | Faster R-CNN X-101-32x8d FPN | 0.799 | - | 0.7007
48 | Mask R-CNN R-50 C4 | 0.620 | - | 0.4520
49 | Mask R-CNN R-50 FPN | 0.889 | 0.690 | 0.4536
50 | Mask R-CNN R-101 FPN | 1.008 | - | 0.5665
51 | Mask R-CNN X-101-32x8d FPN | 0.961 | - | 0.7562
52 | 
53 | ### Training memory
54 | 
55 | The lower, the better
56 | 
57 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
58 | -- | -- | -- | --
59 | Faster R-CNN R-50 C4 | 6.3 | - | 5.8
60 | Faster R-CNN R-50 FPN | 7.2 | 4.9 | 4.4
61 | Faster R-CNN R-101 FPN | 8.9 | - | 7.1
62 | Faster R-CNN X-101-32x8d FPN | 7.0 | - | 7.6
63 | Mask R-CNN R-50 C4 | 6.6 | - | 5.8
64 | Mask R-CNN R-50 FPN | 8.6 | 5.9 | 5.2
65 | Mask R-CNN R-101 FPN | 10.2 | - | 7.9
66 | Mask R-CNN X-101-32x8d FPN | 7.7 | - | 7.8
67 | 
68 | ### Accuracy
69 | 
70 | The higher, the better
71 | 
72 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
73 | -- | -- | -- | --
74 | Faster R-CNN R-50 C4 | 34.8 | - | 34.8
75 | Faster R-CNN R-50 FPN | 36.7 | 36.7 | 36.8
76 | Faster R-CNN R-101 FPN | 39.4 | - | 39.1
77 | Faster R-CNN X-101-32x8d FPN | 41.3 | - | 41.2
78 | Mask R-CNN R-50 C4 | 35.8 & 31.4 | - | 35.6 & 31.5
79 | Mask R-CNN R-50 FPN | 37.7 & 33.9 | 37.5 & 34.4 | 37.8 & 34.2
80 | Mask R-CNN R-101 FPN | 40.0 & 35.9 | - | 40.1 & 36.1
81 | Mask R-CNN X-101-32x8d FPN | 42.1 & 37.3 | - | 42.2 & 37.8
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RetinaMask
 2 | 
 3 | The code is based on the [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark).
 4 | 
 5 | ![alt text](arch.png)
 6 | 
 7 | 
 8 | ### Citing RetinaMask
 9 | Please cite RetinaMask in your publications if it helps your research:
10 | 
11 |     @inproceedings{fu2019retinamask,
12 |       title = {{RetinaMask}: Learning to predict masks improves state-of-the-art single-shot detection for free},
13 |       author = {Fu, Cheng-Yang and  Shvets, Mykhailo and Berg, Alexander C.},
14 |       booktitle = {arXiv preprint arXiv:1901.03353},
15 |       year = {2019}
16 |     }
17 | 
18 | 
19 | ### Contents
20 | 1. [Installation](#installation)
21 | 2. [Models](#models)
22 | 
23 | ### Installation 
24 | Follow the [maskrcnn-benchmark](./OLD_README.md) to install code and set up the dataset. Use config files in ./configs/retina/ for Training and Testing.
25 | 
26 | 
27 | ### Models
28 | 
29 | | Models         | BBox          | B(time)   | Mask  | M(time)    | Link |
30 | | -------------- | ------------- | ---| ----- | --- | ---- |
31 | | ResNet-50-FPN  | 39.4/58.6/42.3/21.9/42.0/51.0 | 0.124 | 34.9/55.7/37.1/15.1/36.7/50.4 | 0.139 | [link](https://drive.google.com/file/d/17QnkNoibgzRnnSeLDFo27LmgPUifFBxz/view?usp=sharing) |
32 | | ResNet-101-FPN | 41.4/ 60.8/44.6/23.0/44.5/53.5 | 0.145 | 36.6/58.0/39.1/16.2/38.8/52.7 | 0.160 | [link](https://drive.google.com/file/d/1Fl-TI1oDRVBFYI1h1mn7IiEOXwTIR3AC/view?usp=sharing) |
33 | | ResNet-101-FPN-GN | 41.7/61.7/45.0/23.5/44.7/52.8 | 0.153 | 36.7/58.8/39.3/16.4/39.4/52.6 | 0.164 | [link](https://drive.google.com/file/d/19MCU9Q0YXbIrc4fRSPRV53T_RSxiVqh0/view?usp=sharing) | 
34 | | ResNeXt32x8d-101-FPN-GN  | 42.6/62.5/46.0/24.8/45.6/53.8 | 0.231 | 37.4/59.8/40.0/17.6/39.9/53.4 | 0.270 | [link](https://drive.google.com/file/d/1n-tcTUZ28s0uvDOB_gZmsFtxA8flGxKF/view?usp=sharing) |
35 | 
36 | P.S. evaluation metric: AP, AP50, AP75, AP(small), AP(medium), AP(large), please refer to [COCO](http://cocodataset.org/#detection-eval) for detailed explanation. The inference time is measured on Nvidia 1080Ti.
37 | 
38 | 
39 | #### Run Inference
40 | Use the following scripts. (Assume models are download to the ./models directory)
41 | Run Mask and BBox
42 | ```
43 | python tools/test_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml MODEL.WEIGHT ./models/retinanet_mask_R-50-FPN_2x_adjust_std011_ms_model.pth
44 | 
45 | ```
46 | Run BBox only
47 | ```
48 | python tools/test_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml MODEL.WEIGHT ./models/retinanet_mask_R-50-FPN_2x_adjust_std011_ms_model.pth MODEL.MASK_ON False
49 | 
50 | ```
51 | 


--------------------------------------------------------------------------------
/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | 
 3 | Here is a compilation if common issues that you might face
 4 | while compiling / running this code:
 5 | 
 6 | ## Compilation errors when compiling the library
 7 | If you encounter build errors like the following:
 8 | ```
 9 | /usr/include/c++/6/type_traits:1558:8: note: provided for ‘template<class _From, class _To> struct std::is_convertible’
10 |      struct is_convertible
11 |         ^~~~~~~~~~~~~~
12 | /usr/include/c++/6/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>&&; bool <anonymous> = true; _Elements = {at::Tensor, at::Tensor, at::Tensor, at::Tensor}]’ not a return-statement
13 |      }
14 |  ^
15 | error: command '/usr/local/cuda/bin/nvcc' failed with exit status 1
16 | ```
17 | check your CUDA version and your `gcc` version.
18 | ```
19 | nvcc --version
20 | gcc --version
21 | ```
22 | If you are using CUDA 9.0 and gcc 6.4.0, then refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/25,
23 | which has a summary of the solution. Basically, CUDA 9.0 is not compatible with gcc 6.4.0.
24 | 
25 | ## ImportError: No module named maskrcnn_benchmark.config when running webcam.py
26 | 
27 | This means that `maskrcnn-benchmark` has not been properly installed.
28 | Refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/22 for a few possible issues.
29 | Note that we now support Python 2 as well.
30 | 
31 | ## Segmentation fault (core dumped) when running the library
32 | This probably means that you have compiled the library using GCC < 4.9, which is ABI incompatible with PyTorch.
33 | Indeed, during installation, you probably saw a message like
34 | ```
35 | Your compiler (g++ 4.8) may be ABI-incompatible with PyTorch!
36 | Please use a compiler that is ABI-compatible with GCC 4.9 and above.
37 | See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.
38 | 
39 | See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
40 | for instructions on how to install GCC 4.9 or higher.
41 | ```
42 | Follow the instructions on https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
43 | to install GCC 4.9 or higher, and try recompiling `maskrcnn-benchmark` again, after cleaning the
44 | `build` folder with
45 | ```
46 | rm -rf build
47 | ```
48 | 


--------------------------------------------------------------------------------
/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/arch.png


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TEST: ("coco_2014_minival",)
24 | DATALOADER:
25 |   SIZE_DIVISIBILITY: 32
26 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 |   META_ARCHITECTURE: "GeneralizedRCNN"
3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 |   TEST: ("coco_2014_minival",)
6 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TEST: ("coco_2014_minival",)
24 | DATALOADER:
25 |   SIZE_DIVISIBILITY: 32
26 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   RESNETS:
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 |   SIZE_DIVISIBILITY: 32
30 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TEST: ("coco_2014_minival",)
33 | DATALOADER:
34 |   SIZE_DIVISIBILITY: 32
35 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
 4 |   ROI_MASK_HEAD:
 5 |     PREDICTOR: "MaskRCNNC4Predictor"
 6 |     SHARE_BOX_FEATURE_EXTRACTOR: True
 7 |   MASK_ON: True
 8 | DATASETS:
 9 |   TEST: ("coco_2014_minival",)
10 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TEST: ("coco_2014_minival",)
33 | DATALOADER:
34 |   SIZE_DIVISIBILITY: 32
35 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   RESNETS:
31 |     STRIDE_IN_1X1: False
32 |     NUM_GROUPS: 32
33 |     WIDTH_PER_GROUP: 8
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | SOLVER:
28 |   BASE_LR: 0.02
29 |   WEIGHT_DECAY: 0.0001
30 |   STEPS: (60000, 80000)
31 |   MAX_ITER: 90000
32 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
 9 |   TEST: ("coco_2014_minival",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 8
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 | DATASETS:
23 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | SOLVER:
28 |   BASE_LR: 0.02
29 |   WEIGHT_DECAY: 0.0001
30 |   STEPS: (60000, 80000)
31 |   MAX_ITER: 90000
32 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   RESNETS:
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 |   SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 |   BASE_LR: 0.01
33 |   WEIGHT_DECAY: 0.0001
34 |   STEPS: (120000, 160000)
35 |   MAX_ITER: 180000
36 |   IMS_PER_BATCH: 8
37 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   BASE_LR: 0.02
38 |   WEIGHT_DECAY: 0.0001
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_MASK_HEAD:
 8 |     PREDICTOR: "MaskRCNNC4Predictor"
 9 |     SHARE_BOX_FEATURE_EXTRACTOR: True
10 |   MASK_ON: True
11 | DATASETS:
12 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 |   TEST: ("coco_2014_minival",)
14 | SOLVER:
15 |   BASE_LR: 0.01
16 |   WEIGHT_DECAY: 0.0001
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   IMS_PER_BATCH: 8
20 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   MASK_ON: True
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   BASE_LR: 0.02
38 |   WEIGHT_DECAY: 0.0001
39 |   STEPS: (60000, 80000)
40 |   MAX_ITER: 90000
41 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |   ROI_BOX_HEAD:
17 |     POOLER_RESOLUTION: 7
18 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
19 |     POOLER_SAMPLING_RATIO: 2
20 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
21 |     PREDICTOR: "FPNPredictor"
22 |   ROI_MASK_HEAD:
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
25 |     PREDICTOR: "MaskRCNNC4Predictor"
26 |     POOLER_RESOLUTION: 14
27 |     POOLER_SAMPLING_RATIO: 2
28 |     RESOLUTION: 28
29 |     SHARE_BOX_FEATURE_EXTRACTOR: False
30 |   RESNETS:
31 |     STRIDE_IN_1X1: False
32 |     NUM_GROUPS: 32
33 |     WIDTH_PER_GROUP: 8
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 |   TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.01
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (120000, 160000)
44 |   MAX_ITER: 180000
45 |   IMS_PER_BATCH: 8
46 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 | DATASETS:
10 |   TRAIN: ("coco_2014_minival",)
11 |   TEST: ("coco_2014_minival",)
12 | INPUT:
13 |   MIN_SIZE_TRAIN: 600
14 |   MAX_SIZE_TRAIN: 1000
15 |   MIN_SIZE_TEST: 800
16 |   MAX_SIZE_TEST: 1000
17 | SOLVER:
18 |   BASE_LR: 0.005
19 |   WEIGHT_DECAY: 0.0001
20 |   STEPS: (1500,)
21 |   MAX_ITER: 2000
22 |   IMS_PER_BATCH: 2
23 | TEST:
24 |   IMS_PER_BATCH: 2
25 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_minival",)
25 |   TEST: ("coco_2014_minival",)
26 | INPUT:
27 |   MIN_SIZE_TRAIN: 600
28 |   MAX_SIZE_TRAIN: 1000
29 |   MIN_SIZE_TEST: 800
30 |   MAX_SIZE_TEST: 1000
31 | DATALOADER:
32 |   SIZE_DIVISIBILITY: 32
33 | SOLVER:
34 |   BASE_LR: 0.005
35 |   WEIGHT_DECAY: 0.0001
36 |   STEPS: (1500,)
37 |   MAX_ITER: 2000
38 |   IMS_PER_BATCH: 4
39 | TEST:
40 |   IMS_PER_BATCH: 2
41 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   RESNETS:
24 |     STRIDE_IN_1X1: False
25 |     NUM_GROUPS: 32
26 |     WIDTH_PER_GROUP: 8
27 | DATASETS:
28 |   TRAIN: ("coco_2014_minival",)
29 |   TEST: ("coco_2014_minival",)
30 | INPUT:
31 |   MIN_SIZE_TRAIN: 600
32 |   MAX_SIZE_TRAIN: 1000
33 |   MIN_SIZE_TEST: 800
34 |   MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (1500,)
41 |   MAX_ITER: 2000
42 |   IMS_PER_BATCH: 2
43 | TEST:
44 |   IMS_PER_BATCH: 2
45 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |   ROI_MASK_HEAD:
10 |     PREDICTOR: "MaskRCNNC4Predictor"
11 |     SHARE_BOX_FEATURE_EXTRACTOR: True
12 |   MASK_ON: True
13 | DATASETS:
14 |   TRAIN: ("coco_2014_minival",)
15 |   TEST: ("coco_2014_minival",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: 600
18 |   MAX_SIZE_TRAIN: 1000
19 |   MIN_SIZE_TEST: 800
20 |   MAX_SIZE_TEST: 1000
21 | SOLVER:
22 |   BASE_LR: 0.005
23 |   WEIGHT_DECAY: 0.0001
24 |   STEPS: (1500,)
25 |   MAX_ITER: 2000
26 |   IMS_PER_BATCH: 4
27 | TEST:
28 |   IMS_PER_BATCH: 2
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2014_minival",)
34 |   TEST: ("coco_2014_minival",)
35 | INPUT:
36 |   MIN_SIZE_TRAIN: 600
37 |   MAX_SIZE_TRAIN: 1000
38 |   MIN_SIZE_TEST: 800
39 |   MAX_SIZE_TEST: 1000
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   BASE_LR: 0.005
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (1500,)
46 |   MAX_ITER: 2000
47 |   IMS_PER_BATCH: 4
48 | TEST:
49 |   IMS_PER_BATCH: 2
50 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |     OUT_CHANNELS: 256
 7 |   RPN:
 8 |     USE_FPN: True
 9 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
10 |     PRE_NMS_TOP_N_TRAIN: 2000
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 1000
13 |     FPN_POST_NMS_TOP_N_TEST: 1000
14 |   ROI_HEADS:
15 |     USE_FPN: True
16 |     BATCH_SIZE_PER_IMAGE: 256
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   RESNETS:
32 |     STRIDE_IN_1X1: False
33 |     NUM_GROUPS: 32
34 |     WIDTH_PER_GROUP: 8
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2014_minival",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: 600
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (1500,)
50 |   MAX_ITER: 2000
51 |   IMS_PER_BATCH: 2
52 | TEST:
53 |   IMS_PER_BATCH: 2
54 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_minival",)
10 |   TEST: ("coco_2014_minival",)
11 | INPUT:
12 |   MIN_SIZE_TRAIN: 600
13 |   MAX_SIZE_TRAIN: 1000
14 |   MIN_SIZE_TEST: 800
15 |   MAX_SIZE_TEST: 1000
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (1500,)
20 |   MAX_ITER: 2000
21 |   IMS_PER_BATCH: 4
22 | TEST:
23 |   IMS_PER_BATCH: 2
24 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TEST: 1000
12 |     POST_NMS_TOP_N_TEST: 2000
13 |     FPN_POST_NMS_TOP_N_TEST: 2000
14 | DATASETS:
15 |   TRAIN: ("coco_2014_minival",)
16 |   TEST: ("coco_2014_minival",)
17 | INPUT:
18 |   MIN_SIZE_TRAIN: 600
19 |   MAX_SIZE_TRAIN: 1000
20 |   MIN_SIZE_TEST: 800
21 |   MAX_SIZE_TEST: 1000
22 | DATALOADER:
23 |   SIZE_DIVISIBILITY: 32
24 | SOLVER:
25 |   BASE_LR: 0.005
26 |   WEIGHT_DECAY: 0.0001
27 |   STEPS: (1500,)
28 |   MAX_ITER: 2000
29 |   IMS_PER_BATCH: 4
30 | TEST:
31 |   IMS_PER_BATCH: 2
32 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800, )
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (120000, 160000)
41 |   MAX_ITER: 180000
42 |   IMS_PER_BATCH: 8
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   SELFADJUST_SMOOTH_L1: True
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   BBOX_REG_BETA: 1.0
48 |   SELFADJUST_SMOOTH_L1: True
49 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   SELFADJUST_SMOOTH_L1: True
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_beta100.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   BBOX_REG_BETA: 1.0
48 |   SELFADJUST_SMOOTH_L1: False
49 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   LOW_QUALITY_THRESHOLD: 0.2
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   LOW_QUALITY_THRESHOLD: 0.3
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   LOW_QUALITY_THRESHOLD: 0.4
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   LOW_QUALITY_MATCHES: False
48 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (800,)
31 |   MAX_SIZE_TRAIN: 1333
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 1333
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 4 gpus
38 |   BASE_LR: 0.01
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | RETINANET:
44 |   RETINANET_ON: True
45 |   SCALES_PER_OCTAVE: 3
46 |   STRADDLE_THRESH: -1
47 |   LOW_QUALITY_MATCHES: False
48 |   SELFADJUST_SMOOTH_L1: True
49 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (400,)
41 |   MAX_SIZE_TRAIN: 667
42 |   MIN_SIZE_TEST: 400
43 |   MAX_SIZE_TEST: 667
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 |   #TEST: ("coco_test-dev",)
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (500,)
42 |   MAX_SIZE_TRAIN: 833
43 |   MIN_SIZE_TEST: 500
44 |   MAX_SIZE_TEST: 833
45 | DATALOADER:
46 |   SIZE_DIVISIBILITY: 32
47 | SOLVER:
48 |   # Assume 4 gpus
49 |   BASE_LR: 0.005
50 |   WEIGHT_DECAY: 0.0001
51 |   STEPS: (180000, 240000)
52 |   MAX_ITER: 270000
53 |   IMS_PER_BATCH: 8
54 | RETINANET:
55 |   RETINANET_ON: True
56 |   BACKBONE: "p3p7"
57 |   SCALES_PER_OCTAVE: 3
58 |   STRADDLE_THRESH: -1
59 |   NUM_MASKS_TEST: 50
60 |   BBOX_REG_BETA: 0.11
61 |   SELFADJUST_SMOOTH_L1: True
62 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (600,)
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 600
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (700,)
41 |   MAX_SIZE_TRAIN: 1167
42 |   MIN_SIZE_TEST: 700
43 |   MAX_SIZE_TEST: 1167
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (360000, 480000)
51 |   MAX_ITER: 540000
52 |   IMS_PER_BATCH: 4
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (360000, 480000)
51 |   MAX_ITER: 540000
52 |   IMS_PER_BATCH: 4
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (640, 800, 1000)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (480000, 640000)
51 |   MAX_ITER: 720000
52 |   IMS_PER_BATCH: 4
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 |   USE_GN: True
37 | DATASETS:
38 |   TRAIN: ("coco_2017_train",)
39 |   TEST: ("coco_2017_val",)
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 800, 1000)
42 |   MAX_SIZE_TRAIN: 1333
43 |   MIN_SIZE_TEST: 800
44 |   MAX_SIZE_TEST: 1333
45 | DATALOADER:
46 |   SIZE_DIVISIBILITY: 32
47 | SOLVER:
48 |   # Assume 4 gpus
49 |   BASE_LR: 0.005
50 |   WEIGHT_DECAY: 0.0001
51 |   STEPS: (240000, 320000)
52 |   MAX_ITER: 360000
53 |   IMS_PER_BATCH: 8
54 | RETINANET:
55 |   RETINANET_ON: True
56 |   BACKBONE: "p3p7"
57 |   SCALES_PER_OCTAVE: 3
58 |   STRADDLE_THRESH: -1
59 |   NUM_MASKS_TEST: 50
60 |   BBOX_REG_BETA: 0.11
61 |   SELFADJUST_SMOOTH_L1: True
62 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (400,)
41 |   MAX_SIZE_TRAIN: 667
42 |   MIN_SIZE_TEST: 400
43 |   MAX_SIZE_TEST: 667
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (400,)
41 |   MAX_SIZE_TRAIN: 667
42 |   MIN_SIZE_TEST: 400
43 |   MAX_SIZE_TEST: 667
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (600,)
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 600
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (180000, 240000)
51 |   MAX_ITER: 270000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800,)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (120000, 160000)
51 |   MAX_ITER: 180000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2017_train",)
38 |   TEST: ("coco_2017_val",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (640, 800, 1000)
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.005
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (240000, 320000)
51 |   MAX_ITER: 360000
52 |   IMS_PER_BATCH: 8
53 | RETINANET:
54 |   RETINANET_ON: True
55 |   BACKBONE: "p3p7"
56 |   SCALES_PER_OCTAVE: 3
57 |   STRADDLE_THRESH: -1
58 |   NUM_MASKS_TEST: 50
59 |   BBOX_REG_BETA: 0.11
60 |   SELFADJUST_SMOOTH_L1: True
61 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |     CANONICAL_LEVEL: 5
36 |   MASK_ON: True
37 | DATASETS:
38 |   TRAIN: ("coco_2017_train",)
39 |   TEST: ("coco_2017_val",)
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (800,)
42 |   MAX_SIZE_TRAIN: 1333
43 |   MIN_SIZE_TEST: 800
44 |   MAX_SIZE_TEST: 1333
45 | DATALOADER:
46 |   SIZE_DIVISIBILITY: 32
47 | SOLVER:
48 |   # Assume 4 gpus
49 |   BASE_LR: 0.005
50 |   WEIGHT_DECAY: 0.0001
51 |   STEPS: (180000, 240000)
52 |   MAX_ITER: 270000
53 |   IMS_PER_BATCH: 8
54 | RETINANET:
55 |   RETINANET_ON: True
56 |   BACKBONE: "p3p7"
57 |   SCALES_PER_OCTAVE: 3
58 |   STRADDLE_THRESH: -1
59 |   NUM_MASKS_TEST: 50
60 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |     CANONICAL_LEVEL: 5
36 |   MASK_ON: True
37 | DATASETS:
38 |   TRAIN: ("coco_2017_train",)
39 |   TEST: ("coco_2017_val",)
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (800,)
42 |   MAX_SIZE_TRAIN: 1333
43 |   MIN_SIZE_TEST: 800
44 |   MAX_SIZE_TEST: 1333
45 | DATALOADER:
46 |   SIZE_DIVISIBILITY: 32
47 | SOLVER:
48 |   # Assume 4 gpus
49 |   BASE_LR: 0.005
50 |   WEIGHT_DECAY: 0.0001
51 |   STEPS: (120000, 160000)
52 |   MAX_ITER: 180000
53 |   IMS_PER_BATCH: 8
54 | RETINANET:
55 |   RETINANET_ON: True
56 |   BACKBONE: "p3p7"
57 |   SCALES_PER_OCTAVE: 3
58 |   STRADDLE_THRESH: -1
59 |   NUM_MASKS_TEST: 50
60 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.125, 0.0625, 0.03125)
28 |     #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   RESNETS:
36 |     STRIDE_IN_1X1: False
37 |     NUM_GROUPS: 32
38 |     WIDTH_PER_GROUP: 8
39 |   MASK_ON: True
40 |   USE_GN: True
41 | DATASETS:
42 |   TRAIN: ("coco_2017_train",)
43 |   TEST: ("coco_2017_val",)
44 |   #TEST: ("coco_test-dev",)
45 | INPUT:
46 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 840, 880, 920, 960, 1000)
47 |   MAX_SIZE_TRAIN: 1333
48 |   MIN_SIZE_TEST: 800
49 |   MAX_SIZE_TEST: 1333
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   # Assume 4 gpus
54 |   BASE_LR: 0.0025
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (480000, 640000)
57 |   MAX_ITER: 720000
58 |   IMS_PER_BATCH: 4
59 | RETINANET:
60 |   RETINANET_ON: True
61 |   BACKBONE: "p3p7"
62 |   SCALES_PER_OCTAVE: 3
63 |   STRADDLE_THRESH: -1
64 |   NUM_MASKS_TEST: 50
65 |   BBOX_REG_BETA: 0.11
66 |   SELFADJUST_SMOOTH_L1: True
67 | 


--------------------------------------------------------------------------------
/configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |     OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     FG_IOU_THRESHOLD: 0.5
11 |     BG_IOU_THRESHOLD: 0.4
12 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
13 |     PRE_NMS_TOP_N_TRAIN: 2000
14 |     PRE_NMS_TOP_N_TEST: 1000
15 |     POST_NMS_TOP_N_TEST: 1000
16 |     FPN_POST_NMS_TOP_N_TEST: 1000
17 |   ROI_HEADS:
18 |     USE_FPN: True
19 |     BATCH_SIZE_PER_IMAGE: 256
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2017_train",)
37 |   TEST: ("coco_2017_val",)
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (800,)
40 |   MAX_SIZE_TRAIN: 1333
41 |   MIN_SIZE_TEST: 800
42 |   MAX_SIZE_TEST: 1333
43 | DATALOADER:
44 |   SIZE_DIVISIBILITY: 32
45 | SOLVER:
46 |   # Assume 4 gpus
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (120000, 160000)
50 |   MAX_ITER: 180000
51 |   IMS_PER_BATCH: 8
52 | RETINANET:
53 |   RETINANET_ON: True
54 |   BACKBONE: "p2p7"
55 |   SCALES_PER_OCTAVE: 3
56 |   STRADDLE_THRESH: -1
57 |   NUM_MASKS_TEST: 50
58 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
 4 | 
 5 | You can start it by running it from this folder, using one of the following commands:
 6 | ```bash
 7 | # by default, it runs on the GPU
 8 | # for best results, use min-image-size 800
 9 | python webcam.py --min-image-size 800
10 | # can also run it on the CPU
11 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
12 | # or change the model that you want to use
13 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.py --min-image-size 300 MODEL.DEVICE cpu
14 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
15 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
16 | ```
17 | 


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png


--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import cv2
 4 | 
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import COCODemo
 7 | 
 8 | import time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |     )
19 |     parser.add_argument(
20 |         "--confidence-threshold",
21 |         type=float,
22 |         default=0.7,
23 |         help="Minimum score for the prediction to be shown",
24 |     )
25 |     parser.add_argument(
26 |         "--min-image-size",
27 |         type=int,
28 |         default=224,
29 |         help="Smallest size of the image to feed to the model. "
30 |             "Model was trained with 800, which gives best results",
31 |     )
32 |     parser.add_argument(
33 |         "--show-mask-heatmaps",
34 |         dest="show_mask_heatmaps",
35 |         help="Show a heatmap probability for the top masks-per-dim masks",
36 |         action="store_true",
37 |     )
38 |     parser.add_argument(
39 |         "--masks-per-dim",
40 |         type=int,
41 |         default=2,
42 |         help="Number of heatmaps per dimension to show",
43 |     )
44 |     parser.add_argument(
45 |         "opts",
46 |         help="Modify model config options using the command-line",
47 |         default=None,
48 |         nargs=argparse.REMAINDER,
49 |     )
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     # load config from file and command-line arguments
54 |     cfg.merge_from_file(args.config_file)
55 |     cfg.merge_from_list(args.opts)
56 |     cfg.freeze()
57 | 
58 |     # prepare object that handles inference plus adds predictions on top of image
59 |     coco_demo = COCODemo(
60 |         cfg,
61 |         confidence_threshold=args.confidence_threshold,
62 |         show_mask_heatmaps=args.show_mask_heatmaps,
63 |         masks_per_dim=args.masks_per_dim,
64 |         min_image_size=args.min_image_size,
65 |     )
66 | 
67 |     cam = cv2.VideoCapture(0)
68 |     while True:
69 |         start_time = time.time()
70 |         ret_val, img = cam.read()
71 |         composite = coco_demo.run_on_opencv_image(img)
72 |         print("Time: {:.2f} s / img".format(time.time() - start_time))
73 |         cv2.imshow("COCO detections", composite)
74 |         if cv2.waitKey(1) == 27:
75 |             break  # esc to quit
76 |     cv2.destroyAllWindows()
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor SigmoidFocalLoss_forward_cuda(
 7 | 		const at::Tensor& logits,
 8 |                 const at::Tensor& targets,
 9 | 		const int num_classes, 
10 | 		const float gamma, 
11 | 		const float alpha); 
12 | 
13 | at::Tensor SigmoidFocalLoss_backward_cuda(
14 | 			     const at::Tensor& logits,
15 |                              const at::Tensor& targets,
16 | 			     const at::Tensor& d_losses,
17 | 			     const int num_classes,
18 | 			     const float gamma,
19 | 			     const float alpha);
20 | 
21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
22 |                                  const at::Tensor& rois,
23 |                                  const float spatial_scale,
24 |                                  const int pooled_height,
25 |                                  const int pooled_width,
26 |                                  const int sampling_ratio);
27 | 
28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
29 |                                   const at::Tensor& rois,
30 |                                   const float spatial_scale,
31 |                                   const int pooled_height,
32 |                                   const int pooled_width,
33 |                                   const int batch_size,
34 |                                   const int channels,
35 |                                   const int height,
36 |                                   const int width,
37 |                                   const int sampling_ratio);
38 | 
39 | 
40 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
41 |                                 const at::Tensor& rois,
42 |                                 const float spatial_scale,
43 |                                 const int pooled_height,
44 |                                 const int pooled_width);
45 | 
46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
47 |                                  const at::Tensor& input,
48 |                                  const at::Tensor& rois,
49 |                                  const at::Tensor& argmax,
50 |                                  const float spatial_scale,
51 |                                  const int pooled_height,
52 |                                  const int pooled_width,
53 |                                  const int batch_size,
54 |                                  const int channels,
55 |                                  const int height,
56 |                                  const int width);
57 | 
58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
59 | 
60 | 
61 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
62 |                              const int height,
63 |                              const int width);
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
14 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
15 | }
16 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset
3 | from .concat_dataset import ConcatDataset
4 | 
5 | __all__ = ["COCODataset", "ConcatDataset"]
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/coco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | import torchvision
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
 7 | 
 8 | 
 9 | class COCODataset(torchvision.datasets.coco.CocoDetection):
10 |     def __init__(
11 |         self, ann_file, root, remove_images_without_annotations, transforms=None
12 |     ):
13 |         super(COCODataset, self).__init__(root, ann_file)
14 | 
15 |         # sort indices for reproducible results
16 |         self.ids = sorted(self.ids)
17 | 
18 |         # filter images without detection annotations
19 |         if remove_images_without_annotations:
20 |             self.ids = [
21 |                 img_id
22 |                 for img_id in self.ids
23 |                 if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
24 |             ]
25 | 
26 |         self.json_category_id_to_contiguous_id = {
27 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
28 |         }
29 |         self.contiguous_category_id_to_json_id = {
30 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
31 |         }
32 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
33 |         self.transforms = transforms
34 | 
35 |     def __getitem__(self, idx):
36 |         img, anno = super(COCODataset, self).__getitem__(idx)
37 | 
38 |         # filter crowd annotations
39 |         # TODO might be better to add an extra field
40 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
41 | 
42 |         boxes = [obj["bbox"] for obj in anno]
43 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
44 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
45 | 
46 |         classes = [obj["category_id"] for obj in anno]
47 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
48 |         classes = torch.tensor(classes)
49 |         target.add_field("labels", classes)
50 | 
51 |         masks = [obj["segmentation"] for obj in anno]
52 |         masks = SegmentationMask(masks, img.size)
53 |         target.add_field("masks", masks)
54 | 
55 |         target = target.clip_to_image(remove_empty=True)
56 | 
57 |         if self.transforms is not None:
58 |             img, target = self.transforms(img, target)
59 | 
60 |         return img, target, idx
61 | 
62 |     def get_img_info(self, index):
63 |         img_id = self.id_to_img_map[index]
64 |         img_data = self.coco.imgs[img_id]
65 |         return img_data
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed,
 3 | # with a modification in the import to use the deprecated backend
 4 | # FIXME remove this once c10d fixes the bug it has
 5 | import math
 6 | import torch
 7 | import torch.distributed.deprecated as dist
 8 | from torch.utils.data.sampler import Sampler
 9 | 
10 | 
11 | class DistributedSampler(Sampler):
12 |     """Sampler that restricts data loading to a subset of the dataset.
13 |     It is especially useful in conjunction with
14 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
15 |     process can pass a DistributedSampler instance as a DataLoader sampler,
16 |     and load a subset of the original dataset that is exclusive to it.
17 |     .. note::
18 |         Dataset is assumed to be of constant size.
19 |     Arguments:
20 |         dataset: Dataset used for sampling.
21 |         num_replicas (optional): Number of processes participating in
22 |             distributed training.
23 |         rank (optional): Rank of the current process within num_replicas.
24 |     """
25 | 
26 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
27 |         if num_replicas is None:
28 |             if not dist.is_available():
29 |                 raise RuntimeError("Requires distributed package to be available")
30 |             num_replicas = dist.get_world_size()
31 |         if rank is None:
32 |             if not dist.is_available():
33 |                 raise RuntimeError("Requires distributed package to be available")
34 |             rank = dist.get_rank()
35 |         self.dataset = dataset
36 |         self.num_replicas = num_replicas
37 |         self.rank = rank
38 |         self.epoch = 0
39 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
40 |         self.total_size = self.num_samples * self.num_replicas
41 |         self.shuffle = True
42 | 
43 |     def __iter__(self):
44 |         if self.shuffle:
45 |             # deterministically shuffle based on epoch
46 |             g = torch.Generator()
47 |             g.manual_seed(self.epoch)
48 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
49 |         else:
50 |             indices = torch.arange(len(self.dataset)).tolist()
51 | 
52 |         # add extra samples to make it evenly divisible
53 |         indices += indices[: (self.total_size - len(indices))]
54 |         assert len(indices) == self.total_size
55 | 
56 |         # subsample
57 |         offset = self.num_samples * self.rank
58 |         indices = indices[offset : offset + self.num_samples]
59 |         assert len(indices) == self.num_samples
60 | 
61 |         return iter(indices)
62 | 
63 |     def __len__(self):
64 |         return self.num_samples
65 | 
66 |     def set_epoch(self, epoch):
67 |         self.epoch = epoch
68 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .transforms import Compose
 3 | from .transforms import Resize
 4 | from .transforms import RandomHorizontalFlip
 5 | from .transforms import ToTensor
 6 | from .transforms import Normalize
 7 | 
 8 | from .build import build_transforms
 9 | 
10 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |         resize = T.MultiScaleResize(min_size, max_size)
11 |     else:
12 |         min_size = cfg.INPUT.MIN_SIZE_TEST
13 |         max_size = cfg.INPUT.MAX_SIZE_TEST
14 |         flip_prob = 0
15 |         resize = T.Resize(min_size, max_size)
16 | 
17 |     to_bgr255 = cfg.INPUT.TO_BGR255
18 |     normalize_transform = T.Normalize(
19 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
20 |     )
21 | 
22 |     transform = T.Compose(
23 |         [
24 |             resize,
25 |             T.RandomHorizontalFlip(flip_prob),
26 |             T.ToTensor(),
27 |             normalize_transform,
28 |         ]
29 |     )
30 |     return transform
31 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import random
  3 | 
  4 | import torch
  5 | import torchvision
  6 | from torchvision.transforms import functional as F
  7 | 
  8 | 
  9 | class Compose(object):
 10 |     def __init__(self, transforms):
 11 |         self.transforms = transforms
 12 | 
 13 |     def __call__(self, image, target):
 14 |         for t in self.transforms:
 15 |             image, target = t(image, target)
 16 |         return image, target
 17 | 
 18 |     def __repr__(self):
 19 |         format_string = self.__class__.__name__ + "("
 20 |         for t in self.transforms:
 21 |             format_string += "\n"
 22 |             format_string += "    {0}".format(t)
 23 |         format_string += "\n)"
 24 |         return format_string
 25 | 
 26 | 
 27 | class Resize(object):
 28 |     def __init__(self, min_size, max_size):
 29 |         self.min_size = min_size
 30 |         self.max_size = max_size
 31 | 
 32 |     # modified from torchvision to add support for max size
 33 |     def get_size(self, image_size):
 34 |         w, h = image_size
 35 |         size = self.min_size
 36 |         max_size = self.max_size
 37 |         if max_size is not None:
 38 |             min_original_size = float(min((w, h)))
 39 |             max_original_size = float(max((w, h)))
 40 |             if max_original_size / min_original_size * size > max_size:
 41 |                 size = int(round(max_size * min_original_size / max_original_size))
 42 | 
 43 |         if (w <= h and w == size) or (h <= w and h == size):
 44 |             return (h, w)
 45 | 
 46 |         if w < h:
 47 |             ow = size
 48 |             oh = int(size * h / w)
 49 |         else:
 50 |             oh = size
 51 |             ow = int(size * w / h)
 52 | 
 53 |         return (oh, ow)
 54 | 
 55 |     def __call__(self, image, target):
 56 |         size = self.get_size(image.size)
 57 |         image = F.resize(image, size)
 58 |         target = target.resize(image.size)
 59 |         return image, target
 60 | 
 61 | 
 62 | class MultiScaleResize(object):
 63 |     def __init__(self, min_sizes, max_size):
 64 |         self.resizers = []
 65 |         for min_size in min_sizes:
 66 |             self.resizers.append(Resize(min_size, max_size))
 67 | 
 68 |     def __call__(self, image, target):
 69 |         resizer = random.choice(self.resizers)
 70 |         image, target = resizer(image, target)
 71 | 
 72 |         return image, target
 73 | 
 74 | 
 75 | class RandomHorizontalFlip(object):
 76 |     def __init__(self, prob=0.5):
 77 |         self.prob = prob
 78 | 
 79 |     def __call__(self, image, target):
 80 |         if random.random() < self.prob:
 81 |             image = F.hflip(image)
 82 |             target = target.transpose(0)
 83 |         return image, target
 84 | 
 85 | 
 86 | class ToTensor(object):
 87 |     def __call__(self, image, target):
 88 |         return F.to_tensor(image), target
 89 | 
 90 | 
 91 | class Normalize(object):
 92 |     def __init__(self, mean, std, to_bgr255=True):
 93 |         self.mean = mean
 94 |         self.std = std
 95 |         self.to_bgr255 = to_bgr255
 96 | 
 97 |     def __call__(self, image, target):
 98 |         if self.to_bgr255:
 99 |             image = image[[2, 1, 0]] * 255
100 |         image = F.normalize(image, mean=self.mean, std=self.std)
101 |         return image, target
102 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/trainer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import datetime
  3 | import logging
  4 | import time
  5 | 
  6 | import torch
  7 | from torch.distributed import deprecated as dist
  8 | 
  9 | from maskrcnn_benchmark.utils.comm import get_world_size, get_rank
 10 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 11 | 
 12 | 
 13 | def reduce_loss_dict(loss_dict):
 14 |     """
 15 |     Reduce the loss dictionary from all processes so that process with rank
 16 |     0 has the averaged results. Returns a dict with the same fields as
 17 |     loss_dict, after reduction.
 18 |     """
 19 |     world_size = get_world_size()
 20 |     if world_size < 2:
 21 |         return loss_dict
 22 |     with torch.no_grad():
 23 |         loss_names = []
 24 |         all_losses = []
 25 |         for k, v in loss_dict.items():
 26 |             loss_names.append(k)
 27 |             all_losses.append(v)
 28 |         all_losses = torch.stack(all_losses, dim=0)
 29 |         dist.reduce(all_losses, dst=0)
 30 |         if dist.get_rank() == 0:
 31 |             # only main process gets accumulated, so only divide by
 32 |             # world_size in this case
 33 |             all_losses /= world_size
 34 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
 35 |     return reduced_losses
 36 | 
 37 | 
 38 | def do_train(
 39 |     model,
 40 |     data_loader,
 41 |     optimizer,
 42 |     scheduler,
 43 |     checkpointer,
 44 |     device,
 45 |     checkpoint_period,
 46 |     arguments,
 47 | ):
 48 |     logger = logging.getLogger("maskrcnn_benchmark.trainer")
 49 |     logger.info("Start training")
 50 |     meters = MetricLogger(delimiter="  ")
 51 |     max_iter = len(data_loader)
 52 |     start_iter = arguments["iteration"]
 53 |     model.train()
 54 |     start_training_time = time.time()
 55 |     end = time.time()
 56 |     for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
 57 |         data_time = time.time() - end
 58 |         arguments["iteration"] = iteration
 59 | 
 60 |         scheduler.step()
 61 | 
 62 |         images = images.to(device)
 63 |         targets = [target.to(device) for target in targets]
 64 | 
 65 |         loss_dict = model(images, targets)
 66 | 
 67 |         losses = sum(loss for loss in loss_dict.values())
 68 | 
 69 |         # reduce losses over all GPUs for logging purposes
 70 |         loss_dict_reduced = reduce_loss_dict(loss_dict)
 71 |         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
 72 |         meters.update(loss=losses_reduced, **loss_dict_reduced)
 73 | 
 74 |         optimizer.zero_grad()
 75 |         losses.backward()
 76 |         optimizer.step()
 77 | 
 78 |         batch_time = time.time() - end
 79 |         end = time.time()
 80 |         meters.update(time=batch_time, data=data_time)
 81 | 
 82 |         eta_seconds = meters.time.global_avg * (max_iter - iteration)
 83 |         eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
 84 | 
 85 |         #if iteration % 20 == 0 or iteration == (max_iter - 1):
 86 |         if True:
 87 |             logger.info(
 88 |                 meters.delimiter.join(
 89 |                     [
 90 |                         "eta: {eta}",
 91 |                         "iter: {iter}",
 92 |                         "{meters}",
 93 |                         "lr: {lr:.6f}",
 94 |                         "max mem: {memory:.0f}",
 95 |                     ]
 96 |                 ).format(
 97 |                     eta=eta_string,
 98 |                     iter=iteration,
 99 |                     meters=str(meters),
100 |                     lr=optimizer.param_groups[0]["lr"],
101 |                     memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
102 |                 )
103 |             )
104 |         if iteration % checkpoint_period == 0 and iteration > 0:
105 |             checkpointer.save("model_{:07d}".format(iteration+1), **arguments)
106 | 
107 |     checkpointer.save("model_{:07d}".format(iteration), **arguments)
108 |     total_training_time = time.time() - start_training_time
109 |     total_time_str = str(datetime.timedelta(seconds=total_training_time))
110 |     logger.info(
111 |         "Total training time: {} ({:.4f} s / it)".format(
112 |             total_time_str, total_training_time / (max_iter)
113 |         )
114 |     )
115 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import ConvTranspose2d
 7 | from .misc import interpolate
 8 | from .nms import nms
 9 | from .roi_align import ROIAlign
10 | from .roi_align import roi_align
11 | from .roi_pool import ROIPool
12 | from .roi_pool import roi_pool
13 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss
14 | from .sigmoid_focal_loss import SigmoidFocalLoss
15 | from .adjust_smooth_l1_loss import AdjustSmoothL1Loss
16 | 
17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool",
18 |            "smooth_l1_loss", "SmoothL1Loss", "Conv2d", "ConvTranspose2d",
19 |            "interpolate", "FrozenBatchNorm2d", "SigmoidFocalLoss",
20 |            "AdjustSmoothL1Loss"]
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | import logging
 5 | from torch.distributed import deprecated as dist
 6 | 
 7 | class AdjustSmoothL1Loss(nn.Module):
 8 | 
 9 |     def __init__(self, num_features, momentum=0.1, beta=1. /9):
10 |         super(AdjustSmoothL1Loss, self).__init__()
11 |         self.num_features = num_features
12 |         self.momentum = momentum
13 |         self.beta = beta
14 |         self.register_buffer(
15 |             'running_mean', torch.empty(num_features).fill_(beta)
16 |         )
17 |         self.register_buffer('running_var', torch.zeros(num_features))
18 |         self.logger = logging.getLogger("maskrcnn_benchmark.trainer")
19 | 
20 |     def forward(self, inputs, target, size_average=True):
21 | 
22 |         n = torch.abs(inputs -target)
23 |         with torch.no_grad():
24 |             if torch.isnan(n.var(dim=0)).sum().item() == 0:
25 |                 self.running_mean = self.running_mean.to(n.device)
26 |                 self.running_mean *= (1 - self.momentum)
27 |                 self.running_mean += (self.momentum * n.mean(dim=0))
28 |                 self.running_var = self.running_var.to(n.device)
29 |                 self.running_var *= (1 - self.momentum)
30 |                 self.running_var += (self.momentum * n.var(dim=0))
31 | 
32 | 
33 |         beta = (self.running_mean - self.running_var)
34 |         beta = beta.clamp(max=self.beta, min=1e-3)
35 | 
36 |         beta = beta.view(-1, self.num_features).to(n.device)
37 |         cond = n < beta.expand_as(n)
38 |         loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
39 |         if size_average:
40 |             return loss.mean()
41 |         return loss.sum()
42 | 
43 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         scale = self.weight * self.running_var.rsqrt()
21 |         bias = self.bias - self.running_mean * scale
22 |         scale = scale.reshape(1, -1, 1, 1)
23 |         bias = bias.reshape(1, -1, 1, 1)
24 |         return x * scale + bias
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | helper class that supports empty tensors on some nn functions.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in
  6 | those functions.
  7 | 
  8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013
  9 | is implemented
 10 | """
 11 | 
 12 | import math
 13 | import torch
 14 | from torch.nn.modules.utils import _ntuple
 15 | 
 16 | 
 17 | class _NewEmptyTensorOp(torch.autograd.Function):
 18 |     @staticmethod
 19 |     def forward(ctx, x, new_shape):
 20 |         ctx.shape = x.shape
 21 |         return x.new_empty(new_shape)
 22 | 
 23 |     @staticmethod
 24 |     def backward(ctx, grad):
 25 |         shape = ctx.shape
 26 |         return _NewEmptyTensorOp.apply(grad, shape), None
 27 | 
 28 | 
 29 | 
 30 | class Conv2d(torch.nn.Conv2d):
 31 |     def forward(self, x):
 32 |         if x.numel() > 0:
 33 |             return super(Conv2d, self).forward(x)
 34 |         # get output shape
 35 | 
 36 |         output_shape = [
 37 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
 38 |             for i, p, di, k, d in zip(
 39 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
 40 |             )
 41 |         ]
 42 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 43 |         return _NewEmptyTensorOp.apply(x, output_shape)
 44 | 
 45 | 
 46 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
 47 |     def forward(self, x):
 48 |         if x.numel() > 0:
 49 |             return super(ConvTranspose2d, self).forward(x)
 50 |         # get output shape
 51 | 
 52 |         output_shape = [
 53 |             (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
 54 |             for i, p, di, k, d, op in zip(
 55 |                 x.shape[-2:],
 56 |                 self.padding,
 57 |                 self.dilation,
 58 |                 self.kernel_size,
 59 |                 self.stride,
 60 |                 self.output_padding,
 61 |             )
 62 |         ]
 63 |         output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
 64 |         return _NewEmptyTensorOp.apply(x, output_shape)
 65 | 
 66 | 
 67 | def interpolate(
 68 |     input, size=None, scale_factor=None, mode="nearest", align_corners=None
 69 | ):
 70 |     if input.numel() > 0:
 71 |         return torch.nn.functional.interpolate(
 72 |             input, size, scale_factor, mode, align_corners
 73 |         )
 74 | 
 75 |     def _check_size_scale_factor(dim):
 76 |         if size is None and scale_factor is None:
 77 |             raise ValueError("either size or scale_factor should be defined")
 78 |         if size is not None and scale_factor is not None:
 79 |             raise ValueError("only one of size or scale_factor should be defined")
 80 |         if (
 81 |             scale_factor is not None
 82 |             and isinstance(scale_factor, tuple)
 83 |             and len(scale_factor) != dim
 84 |         ):
 85 |             raise ValueError(
 86 |                 "scale_factor shape must match input shape. "
 87 |                 "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
 88 |             )
 89 | 
 90 |     def _output_size(dim):
 91 |         _check_size_scale_factor(dim)
 92 |         if size is not None:
 93 |             return size
 94 |         scale_factors = _ntuple(dim)(scale_factor)
 95 |         # math.floor might return float in py2.7
 96 |         return [
 97 |             int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
 98 |         ]
 99 | 
100 |     output_shape = tuple(_output_size(2))
101 |     output_shape = input.shape[:-2] + output_shape
102 |     return _NewEmptyTensorOp.apply(input, output_shape)
103 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from maskrcnn_benchmark import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from maskrcnn_benchmark import _C
 7 | 
 8 | 
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, num_classes, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets);
13 |         ctx.num_classes = num_classes
14 |         ctx.gamma = gamma
15 |         ctx.alpha = alpha
16 | 
17 |         losses = _C.sigmoid_focalloss_forward(
18 |             logits, targets, num_classes, gamma, alpha
19 |         )
20 |         return losses
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         logits, targets = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_logits = _C.sigmoid_focalloss_backward(
31 |             logits, targets, d_loss, num_classes, gamma, alpha
32 |         )
33 |         return d_logits, None, None, None, None
34 | 
35 | 
36 | sigmoid_focalloss = _SigmoidFocalLoss.apply
37 | 
38 | 
39 | class SigmoidFocalLoss(nn.Module):
40 |     def __init__(self, num_classes, gamma, alpha):
41 |         super(SigmoidFocalLoss, self).__init__()
42 |         self.num_classes = num_classes
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         loss = sigmoid_focalloss(
48 |             logits, targets, self.num_classes, self.gamma, self.alpha
49 |         )
50 |         return loss.sum()
51 | 
52 |     def __repr__(self):
53 |         tmpstr = self.__class__.__name__ + "("
54 |         tmpstr += "num_classes=" + str(self.num_classes)
55 |         tmpstr += ", gamma=" + str(self.gamma)
56 |         tmpstr += ", alpha=" + str(self.alpha)
57 |         tmpstr += ")"
58 |         return tmpstr
59 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class SmoothL1Loss(torch.nn.Module):
 6 |     def __init__(self, beta=1. /9):
 7 |         super(SmoothL1Loss, self).__init__()
 8 |         self.beta = beta
 9 | 
10 |     def forward(self, input, target, size_average=True):
11 |         return smooth_l1_loss(input, target, self.beta, size_average)
12 | 
13 | 
14 | # TODO maybe push this to nn?
15 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
16 |     """
17 |     very similar to the smooth_l1_loss from pytorch, but with
18 |     the extra beta parameter
19 |     """
20 |     n = torch.abs(input - target)
21 |     cond = n < beta
22 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
23 |     if size_average:
24 |         return loss.mean()
25 |     return loss.sum()
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | from collections import OrderedDict
  3 | 
  4 | from torch import nn
  5 | 
  6 | from . import fpn as fpn_module
  7 | from . import resnet
  8 | 
  9 | 
 10 | def build_resnet_backbone(cfg):
 11 |     body = resnet.ResNet(cfg)
 12 |     model = nn.Sequential(OrderedDict([("body", body)]))
 13 |     return model
 14 | 
 15 | 
 16 | def build_resnet_fpn_backbone(cfg):
 17 |     body = resnet.ResNet(cfg)
 18 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
 19 |     out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
 20 |     fpn = fpn_module.FPN(
 21 |         in_channels_list=[
 22 |             in_channels_stage2,
 23 |             in_channels_stage2 * 2,
 24 |             in_channels_stage2 * 4,
 25 |             in_channels_stage2 * 8,
 26 |         ],
 27 |         out_channels=out_channels,
 28 |         top_blocks=fpn_module.LastLevelMaxPool(),
 29 |         use_gn=cfg.MODEL.USE_GN
 30 |     )
 31 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
 32 |     return model
 33 | 
 34 | 
 35 | def build_resnet_fpn_p3p7_backbone(cfg):
 36 |     body = resnet.ResNet(cfg)
 37 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
 38 |     out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
 39 |     fpn = fpn_module.FPN(
 40 |         in_channels_list=[
 41 |             0,
 42 |             in_channels_stage2 * 2,
 43 |             in_channels_stage2 * 4,
 44 |             in_channels_stage2 * 8,
 45 |         ],
 46 |         out_channels=out_channels,
 47 |         top_blocks=fpn_module.LastLevelP6P7(out_channels),
 48 |         use_gn=cfg.MODEL.USE_GN
 49 |     )
 50 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
 51 |     return model
 52 | 
 53 | 
 54 | _BACKBONES = {"resnet": build_resnet_backbone,
 55 |               "resnet-fpn": build_resnet_fpn_backbone,
 56 |               "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone,
 57 |              }
 58 | 
 59 | 
 60 | def build_resnet_fpn_p2p7_backbone(cfg):
 61 |     body = resnet.ResNet(cfg)
 62 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
 63 |     out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
 64 |     fpn = fpn_module.FPN(
 65 |         in_channels_list=[
 66 |             in_channels_stage2,
 67 |             in_channels_stage2 * 2,
 68 |             in_channels_stage2 * 4,
 69 |             in_channels_stage2 * 8,
 70 |         ],
 71 |         out_channels=out_channels,
 72 |         top_blocks=fpn_module.LastLevelP6P7(out_channels),
 73 |         use_gn=cfg.MODEL.USE_GN
 74 |     )
 75 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
 76 |     return model
 77 | 
 78 | 
 79 | _BACKBONES = {"resnet": build_resnet_backbone,
 80 |               "resnet-fpn": build_resnet_fpn_backbone,
 81 |               "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone,
 82 |              }
 83 | 
 84 | 
 85 | 
 86 | def build_backbone(cfg):
 87 |     assert cfg.MODEL.BACKBONE.CONV_BODY.startswith(
 88 |         "R-"
 89 |     ), "Only ResNet and ResNeXt models are currently implemented"
 90 |     # Models using FPN end with "-FPN"
 91 |     if cfg.MODEL.BACKBONE.CONV_BODY.endswith("-FPN"):
 92 |         if cfg.RETINANET.RETINANET_ON:
 93 |             if cfg.RETINANET.BACKBONE == "p3p7":
 94 |                 return build_resnet_fpn_p3p7_backbone(cfg)
 95 |             elif cfg.RETINANET.BACKBONE == "p2p7":
 96 |                 return build_resnet_fpn_p2p7_backbone(cfg)
 97 |             else:
 98 |                 raise Exception("Wrong Setting {}:{}".format(
 99 |                     'cfg.RETINANET.BACKBONE', cfg.RETINANET.BACKBBACKBONE))
100 |         else:
101 |             return build_resnet_fpn_backbone(cfg)
102 |     return build_resnet_backbone(cfg)
103 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.uint8
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.uint8
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | from .retinanet import RetinaNet
 4 | 
 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN,
 6 |                                  "RetinaNet": RetinaNet}
 7 | 
 8 | 
 9 | def build_detection_model(cfg):
10 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
11 |     return meta_arch(cfg)
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..rpn.retinanet import build_retinanet
14 | from ..roi_heads.roi_heads import build_roi_heads
15 | 
16 | 
17 | class GeneralizedRCNN(nn.Module):
18 |     """
19 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
20 |     It consists of three main parts:
21 |     - backbone
22 |     = rpn
23 |     - heads: takes the features + the proposals from the RPN and computes
24 |         detections / masks from it.
25 |     """
26 | 
27 |     def __init__(self, cfg):
28 |         super(GeneralizedRCNN, self).__init__()
29 | 
30 |         self.backbone = build_backbone(cfg)
31 |         if not cfg.RETINANET.RETINANET_ON:
32 |             self.rpn = build_rpn(cfg)
33 |         else:
34 |             self.rpn = build_retinanet(cfg)
35 |         self.roi_heads = build_roi_heads(cfg)
36 | 
37 |     def forward(self, images, targets=None):
38 |         """
39 |         Arguments:
40 |             images (list[Tensor] or ImageList): images to be processed
41 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
42 | 
43 |         Returns:
44 |             result (list[BoxList] or dict[Tensor]): the output from the model.
45 |                 During training, it returns a dict[Tensor] which contains the losses.
46 |                 During testing, it returns list[BoxList] contains additional fields
47 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
48 | 
49 |         """
50 |         if self.training and targets is None:
51 |             raise ValueError("In training mode, targets should be passed")
52 |         images = to_image_list(images)
53 |         features = self.backbone(images.tensors)
54 |         proposals, proposal_losses = self.rpn(images, features, targets)
55 |         if self.roi_heads:
56 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
57 |         else:
58 |             # RPN-only models don't have roi_heads
59 |             x = features
60 |             result = proposals
61 |             detector_losses = {}
62 | 
63 |         if self.training:
64 |             losses = {}
65 |             losses.update(detector_losses)
66 |             losses.update(proposal_losses)
67 |             return losses
68 | 
69 |         return result
70 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | 
10 | 
11 | class ROIBoxHead(torch.nn.Module):
12 |     """
13 |     Generic Box Head class.
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         super(ROIBoxHead, self).__init__()
18 |         self.feature_extractor = make_roi_box_feature_extractor(cfg)
19 |         self.predictor = make_roi_box_predictor(cfg)
20 |         self.post_processor = make_roi_box_post_processor(cfg)
21 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
22 | 
23 |     def forward(self, features, proposals, targets=None):
24 |         """
25 |         Arguments:
26 |             features (list[Tensor]): feature-maps from possibly several levels
27 |             proposals (list[BoxList]): proposal boxes
28 |             targets (list[BoxList], optional): the ground-truth targets.
29 | 
30 |         Returns:
31 |             x (Tensor): the result of the feature extractor
32 |             proposals (list[BoxList]): during training, the subsampled proposals
33 |                 are returned. During testing, the predicted boxlists are returned
34 |             losses (dict[Tensor]): During training, returns the losses for the
35 |                 head. During testing, returns an empty dict.
36 |         """
37 | 
38 |         if self.training:
39 |             # Faster R-CNN subsamples during training the proposals with a fixed
40 |             # positive / negative ratio
41 |             with torch.no_grad():
42 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
43 | 
44 |         # extract features that will be fed to the final classifier. The
45 |         # feature_extractor generally corresponds to the pooler + heads
46 |         x = self.feature_extractor(features, proposals)
47 |         # final classifier that converts the features into predictions
48 |         class_logits, box_regression = self.predictor(x)
49 | 
50 |         if not self.training:
51 |             result = self.post_processor((class_logits, box_regression), proposals)
52 |             return x, result, {}
53 | 
54 |         loss_classifier, loss_box_reg = self.loss_evaluator(
55 |             [class_logits], [box_regression]
56 |         )
57 |         return (
58 |             x,
59 |             proposals,
60 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
61 |         )
62 | 
63 | 
64 | def build_roi_box_head(cfg):
65 |     """
66 |     Constructs a new box head.
67 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
68 |     and make it a parameter in the config
69 |     """
70 |     return ROIBoxHead(cfg)
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.modeling.backbone import resnet
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler
 7 | 
 8 | 
 9 | class ResNet50Conv5ROIFeatureExtractor(nn.Module):
10 |     def __init__(self, config):
11 |         super(ResNet50Conv5ROIFeatureExtractor, self).__init__()
12 | 
13 |         resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
14 |         scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES
15 |         sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
16 |         pooler = Pooler(
17 |             output_size=(resolution, resolution),
18 |             scales=scales,
19 |             sampling_ratio=sampling_ratio,
20 |         )
21 | 
22 |         stage = resnet.StageSpec(index=4, block_count=3, return_features=False)
23 |         head = resnet.ResNetHead(
24 |             block_module=config.MODEL.RESNETS.TRANS_FUNC,
25 |             stages=(stage,),
26 |             num_groups=config.MODEL.RESNETS.NUM_GROUPS,
27 |             width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP,
28 |             stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1,
29 |             stride_init=None,
30 |             res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS,
31 |         )
32 | 
33 |         self.pooler = pooler
34 |         self.head = head
35 | 
36 |     def forward(self, x, proposals):
37 |         x = self.pooler(x, proposals)
38 |         x = self.head(x)
39 |         return x
40 | 
41 | 
42 | class FPN2MLPFeatureExtractor(nn.Module):
43 |     """
44 |     Heads for FPN for classification
45 |     """
46 | 
47 |     def __init__(self, cfg):
48 |         super(FPN2MLPFeatureExtractor, self).__init__()
49 | 
50 |         resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
51 |         scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
52 |         sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
53 |         pooler = Pooler(
54 |             output_size=(resolution, resolution),
55 |             scales=scales,
56 |             sampling_ratio=sampling_ratio,
57 |         )
58 |         input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution ** 2
59 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
60 |         self.pooler = pooler
61 |         self.fc6 = nn.Linear(input_size, representation_size)
62 |         self.fc7 = nn.Linear(representation_size, representation_size)
63 | 
64 |         for l in [self.fc6, self.fc7]:
65 |             # Caffe2 implementation uses XavierFill, which in fact
66 |             # corresponds to kaiming_uniform_ in PyTorch
67 |             nn.init.kaiming_uniform_(l.weight, a=1)
68 |             nn.init.constant_(l.bias, 0)
69 | 
70 |     def forward(self, x, proposals):
71 |         x = self.pooler(x, proposals)
72 |         x = x.view(x.size(0), -1)
73 | 
74 |         x = F.relu(self.fc6(x))
75 |         x = F.relu(self.fc7(x))
76 | 
77 |         return x
78 | 
79 | 
80 | _ROI_BOX_FEATURE_EXTRACTORS = {
81 |     "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor,
82 |     "FPN2MLPFeatureExtractor": FPN2MLPFeatureExtractor,
83 | }
84 | 
85 | 
86 | def make_roi_box_feature_extractor(cfg):
87 |     func = _ROI_BOX_FEATURE_EXTRACTORS[cfg.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR]
88 |     return func(cfg)
89 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | 
 4 | 
 5 | class FastRCNNPredictor(nn.Module):
 6 |     def __init__(self, config, pretrained=None):
 7 |         super(FastRCNNPredictor, self).__init__()
 8 | 
 9 |         stage_index = 4
10 |         stage2_relative_factor = 2 ** (stage_index - 1)
11 |         res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
12 |         num_inputs = res2_out_channels * stage2_relative_factor
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         self.bbox_pred = nn.Linear(num_inputs, num_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | class FPNPredictor(nn.Module):
34 |     def __init__(self, cfg):
35 |         super(FPNPredictor, self).__init__()
36 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
37 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
38 | 
39 |         self.cls_score = nn.Linear(representation_size, num_classes)
40 |         self.bbox_pred = nn.Linear(representation_size, num_classes * 4)
41 | 
42 |         nn.init.normal_(self.cls_score.weight, std=0.01)
43 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
44 |         for l in [self.cls_score, self.bbox_pred]:
45 |             nn.init.constant_(l.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         scores = self.cls_score(x)
49 |         bbox_deltas = self.bbox_pred(x)
50 | 
51 |         return scores, bbox_deltas
52 | 
53 | 
54 | _ROI_BOX_PREDICTOR = {
55 |     "FastRCNNPredictor": FastRCNNPredictor,
56 |     "FPNPredictor": FPNPredictor,
57 | }
58 | 
59 | 
60 | def make_roi_box_predictor(cfg):
61 |     func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], BoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIMaskHead(torch.nn.Module):
37 |     def __init__(self, cfg):
38 |         super(ROIMaskHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg)
41 |         self.predictor = make_roi_mask_predictor(cfg)
42 |         self.post_processor = make_roi_mask_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None):
46 |         """
47 |         Arguments:
48 |             features (list[Tensor]): feature-maps from possibly several levels
49 |             proposals (list[BoxList]): proposal boxes
50 |             targets (list[BoxList], optional): the ground-truth targets.
51 | 
52 |         Returns:
53 |             x (Tensor): the result of the feature extractor
54 |             proposals (list[BoxList]): during training, the original proposals
55 |                 are returned. During testing, the predicted boxlists are returned
56 |                 with the `mask` field set
57 |             losses (dict[Tensor]): During training, returns the losses for the
58 |                 head. During testing, returns an empty dict.
59 |         """
60 | 
61 |         if self.training:
62 |             # during training, only focus on positive boxes
63 |             all_proposals = proposals
64 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
65 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
66 |             x = features
67 |             x = x[torch.cat(positive_inds, dim=0)]
68 |         else:
69 |             x = self.feature_extractor(features, proposals)
70 |         mask_logits = self.predictor(x)
71 | 
72 |         if not self.training:
73 |             result = self.post_processor(mask_logits, proposals)
74 |             return x, result, {}
75 | 
76 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
77 | 
78 |         return x, all_proposals, dict(loss_mask=loss_mask)
79 | 
80 | 
81 | def build_roi_mask_head(cfg):
82 |     return ROIMaskHead(cfg)
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | 
 9 | 
10 | class MaskRCNNFPNFeatureExtractor(nn.Module):
11 |     """
12 |     Heads for FPN for classification
13 |     """
14 | 
15 |     def __init__(self, cfg):
16 |         """
17 |         Arguments:
18 |             num_classes (int): number of output classes
19 |             input_size (int): number of channels of the input once it's flattened
20 |             representation_size (int): size of the intermediate representation
21 |         """
22 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
23 | 
24 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
25 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
26 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
27 |         pooler = Pooler(
28 |             output_size=(resolution, resolution),
29 |             scales=scales,
30 |             sampling_ratio=sampling_ratio,
31 |             canonical_level=cfg.MODEL.ROI_MASK_HEAD.CANONICAL_LEVEL,
32 |         )
33 |         input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
34 |         self.pooler = pooler
35 | 
36 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
37 | 
38 |         next_feature = input_size
39 |         self.blocks = []
40 |         for layer_idx, layer_features in enumerate(layers, 1):
41 |             layer_name = "mask_fcn{}".format(layer_idx)
42 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
43 |             # Caffe2 implementation uses MSRAFill, which in fact
44 |             # corresponds to kaiming_normal_ in PyTorch
45 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
46 |             nn.init.constant_(module.bias, 0)
47 |             self.add_module(layer_name, module)
48 |             next_feature = layer_features
49 |             self.blocks.append(layer_name)
50 | 
51 |     def forward(self, x, proposals):
52 |         x = self.pooler(x, proposals)
53 | 
54 |         for layer_name in self.blocks:
55 |             x = F.relu(getattr(self, layer_name)(x))
56 | 
57 |         return x
58 | 
59 | 
60 | _ROI_MASK_FEATURE_EXTRACTORS = {
61 |     "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor,
62 |     "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor,
63 | }
64 | 
65 | 
66 | def make_roi_mask_feature_extractor(cfg):
67 |     func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR]
68 |     return func(cfg)
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | 
 8 | 
 9 | class MaskRCNNC4Predictor(nn.Module):
10 |     def __init__(self, cfg):
11 |         super(MaskRCNNC4Predictor, self).__init__()
12 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
13 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
14 | 
15 |         if cfg.MODEL.ROI_HEADS.USE_FPN:
16 |             num_inputs = dim_reduced
17 |         else:
18 |             stage_index = 4
19 |             stage2_relative_factor = 2 ** (stage_index - 1)
20 |             res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
21 |             num_inputs = res2_out_channels * stage2_relative_factor
22 | 
23 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
24 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
25 | 
26 |         for name, param in self.named_parameters():
27 |             if "bias" in name:
28 |                 nn.init.constant_(param, 0)
29 |             elif "weight" in name:
30 |                 # Caffe2 implementation uses MSRAFill, which in fact
31 |                 # corresponds to kaiming_normal_ in PyTorch
32 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
33 | 
34 |     def forward(self, x):
35 |         x = F.relu(self.conv5_mask(x))
36 |         return self.mask_fcn_logits(x)
37 | 
38 | 
39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor}
40 | 
41 | 
42 | def make_roi_mask_predictor(cfg):
43 |     func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
44 |     return func(cfg)
45 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .box_head.box_head import build_roi_box_head
 5 | from .mask_head.mask_head import build_roi_mask_head
 6 | 
 7 | 
 8 | class CombinedROIHeads(torch.nn.ModuleDict):
 9 |     """
10 |     Combines a set of individual heads (for box prediction or masks) into a single
11 |     head.
12 |     """
13 | 
14 |     def __init__(self, cfg, heads):
15 |         super(CombinedROIHeads, self).__init__(heads)
16 |         self.cfg = cfg.clone()
17 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
18 |             self.mask.feature_extractor = self.box.feature_extractor
19 | 
20 |     def forward(self, features, proposals, targets=None):
21 |         losses = {}
22 |         # TODO rename x to roi_box_features, if it doesn't increase memory consumption
23 |         x, detections, loss_box = self.box(features, proposals, targets)
24 |         losses.update(loss_box)
25 |         if self.cfg.MODEL.MASK_ON:
26 |             mask_features = features
27 |             # optimization: during training, if we share the feature extractor between
28 |             # the box and the mask heads, then we can reuse the features already computed
29 |             if (
30 |                 self.training
31 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
32 |             ):
33 |                 mask_features = x
34 |             # During training, self.box() will return the unaltered proposals as "detections"
35 |             # this makes the API consistent during training and testing
36 |             x, detections, loss_mask = self.mask(mask_features, detections, targets)
37 |             losses.update(loss_mask)
38 |         return x, detections, losses
39 | 
40 | 
41 | def build_roi_heads(cfg):
42 |     # individually create the heads, that will be combined together
43 |     # afterwards
44 |     roi_heads = []
45 |     if not cfg.MODEL.RPN_ONLY:
46 |         roi_heads.append(("box", build_roi_box_head(cfg)))
47 |     if cfg.MODEL.MASK_ON:
48 |         roi_heads.append(("mask", build_roi_mask_head(cfg)))
49 | 
50 |     # combine individual heads in a single module
51 |     if roi_heads:
52 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
53 | 
54 |     return roi_heads
55 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR
 5 | 
 6 | 
 7 | def make_optimizer(cfg, model):
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 |         if not value.requires_grad:
11 |             continue
12 |         lr = cfg.SOLVER.BASE_LR
13 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
14 |         if "bias" in key:
15 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
16 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
17 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
18 | 
19 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
20 |     return optimizer
21 | 
22 | 
23 | def make_lr_scheduler(cfg, optimizer):
24 |     return WarmupMultiStepLR(
25 |         optimizer,
26 |         cfg.SOLVER.STEPS,
27 |         cfg.SOLVER.GAMMA,
28 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
29 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
30 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
31 |     )
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = self.last_epoch / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/boxlist_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | from .bounding_box import BoxList
  5 | 
  6 | from maskrcnn_benchmark.layers import nms as _box_nms
  7 | 
  8 | 
  9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="score"):
 10 |     """
 11 |     Performs non-maximum suppression on a boxlist, with scores specified
 12 |     in a boxlist field via score_field.
 13 | 
 14 |     Arguments:
 15 |         boxlist(BoxList)
 16 |         nms_thresh (float)
 17 |         max_proposals (int): if > 0, then only the top max_proposals are kept
 18 |             after non-maxium suppression
 19 |         score_field (str)
 20 |     """
 21 |     if nms_thresh <= 0:
 22 |         return boxlist
 23 |     mode = boxlist.mode
 24 |     boxlist = boxlist.convert("xyxy")
 25 |     boxes = boxlist.bbox
 26 |     score = boxlist.get_field(score_field)
 27 |     keep = _box_nms(boxes, score, nms_thresh)
 28 |     if max_proposals > 0:
 29 |         keep = keep[: max_proposals]
 30 |     boxlist = boxlist[keep]
 31 |     return boxlist.convert(mode)
 32 | 
 33 | 
 34 | def remove_small_boxes(boxlist, min_size):
 35 |     """
 36 |     Only keep boxes with both sides >= min_size
 37 | 
 38 |     Arguments:
 39 |         boxlist (Boxlist)
 40 |         min_size (int)
 41 |     """
 42 |     # TODO maybe add an API for querying the ws / hs
 43 |     xywh_boxes = boxlist.convert("xywh").bbox
 44 |     _, _, ws, hs = xywh_boxes.unbind(dim=1)
 45 |     keep = (
 46 |         (ws >= min_size) & (hs >= min_size)
 47 |     ).nonzero().squeeze(1)
 48 |     return boxlist[keep]
 49 | 
 50 | 
 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
 52 | # with slight modifications
 53 | def boxlist_iou(boxlist1, boxlist2):
 54 |     """Compute the intersection over union of two set of boxes.
 55 |     The box order must be (xmin, ymin, xmax, ymax).
 56 | 
 57 |     Arguments:
 58 |       box1: (BoxList) bounding boxes, sized [N,4].
 59 |       box2: (BoxList) bounding boxes, sized [M,4].
 60 | 
 61 |     Returns:
 62 |       (tensor) iou, sized [N,M].
 63 | 
 64 |     Reference:
 65 |       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
 66 |     """
 67 |     if boxlist1.size != boxlist2.size:
 68 |         raise RuntimeError(
 69 |                 "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2))
 70 | 
 71 |     N = len(boxlist1)
 72 |     M = len(boxlist2)
 73 | 
 74 |     area1 = boxlist1.area()
 75 |     area2 = boxlist2.area()
 76 | 
 77 |     box1, box2 = boxlist1.bbox, boxlist2.bbox
 78 | 
 79 |     lt = torch.max(box1[:, None, :2], box2[:, :2])  # [N,M,2]
 80 |     rb = torch.min(box1[:, None, 2:], box2[:, 2:])  # [N,M,2]
 81 | 
 82 |     TO_REMOVE = 1
 83 | 
 84 |     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,2]
 85 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 86 | 
 87 |     iou = inter / (area1[:, None] + area2 - inter)
 88 |     return iou
 89 | 
 90 | 
 91 | # TODO redundant, remove
 92 | def _cat(tensors, dim=0):
 93 |     """
 94 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
 95 |     """
 96 |     assert isinstance(tensors, (list, tuple))
 97 |     if len(tensors) == 1:
 98 |         return tensors[0]
 99 |     return torch.cat(tensors, dim)
100 | 
101 | 
102 | def cat_boxlist(bboxes):
103 |     """
104 |     Concatenates a list of BoxList (having the same image size) into a
105 |     single BoxList
106 | 
107 |     Arguments:
108 |         bboxes (list[BoxList])
109 |     """
110 |     assert isinstance(bboxes, (list, tuple))
111 |     assert all(isinstance(bbox, BoxList) for bbox in bboxes)
112 | 
113 |     size = bboxes[0].size
114 |     assert all(bbox.size == size for bbox in bboxes)
115 | 
116 |     mode = bboxes[0].mode
117 |     assert all(bbox.mode == mode for bbox in bboxes)
118 | 
119 |     fields = set(bboxes[0].fields())
120 |     assert all(set(bbox.fields()) == fields for bbox in bboxes)
121 | 
122 |     cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode)
123 | 
124 |     for field in fields:
125 |         data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0)
126 |         cat_boxes.add_field(field, data)
127 | 
128 |     return cat_boxes
129 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         assert tensors.dim() == 4
45 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
46 |         return ImageList(tensors, image_sizes)
47 |     elif isinstance(tensors, (tuple, list)):
48 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
49 | 
50 |         # TODO Ideally, just remove this and let me model handle arbitrary
51 |         # input sizs
52 |         if size_divisible > 0:
53 |             import math
54 | 
55 |             stride = size_divisible
56 |             max_size = list(max_size)
57 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
58 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
59 |             max_size = tuple(max_size)
60 | 
61 |         batch_shape = (len(tensors),) + max_size
62 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
63 |         for img, pad_img in zip(tensors, batched_imgs):
64 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
65 | 
66 |         image_sizes = [im.shape[-2:] for im in tensors]
67 | 
68 |         return ImageList(batched_imgs, image_sizes)
69 |     else:
70 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, "log.txt"))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         return object.__getattr__(self, attr)
56 | 
57 |     def __str__(self):
58 |         loss_str = []
59 |         for name, meter in self.meters.items():
60 |             loss_str.append(
61 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
62 |             )
63 |         return self.delimiter.join(loss_str)
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_serialization.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | import logging
 4 | 
 5 | import torch
 6 | 
 7 | from maskrcnn_benchmark.utils.imports import import_file
 8 | 
 9 | 
10 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
11 |     """
12 |     Strategy: suppose that the models that we will create will have prefixes appended
13 |     to each of its keys, for example due to an extra level of nesting that the original
14 |     pre-trained weights from ImageNet won't contain. For example, model.state_dict()
15 |     might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
16 |     res2.conv1.weight. We thus want to match both parameters together.
17 |     For that, we look for each model weight, look among all loaded keys if there is one
18 |     that is a suffix of the current weight name, and use it if that's the case.
19 |     If multiple matches exist, take the one with longest size
20 |     of the corresponding name. For example, for the same model as before, the pretrained
21 |     weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
22 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
23 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
24 |     """
25 |     current_keys = sorted(list(model_state_dict.keys()))
26 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
27 |     # get a matrix of string matches, where each (i, j) entry correspond to the size of the
28 |     # loaded_key string, if it matches
29 |     match_matrix = [
30 |         len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys
31 |     ]
32 |     match_matrix = torch.as_tensor(match_matrix).view(
33 |         len(current_keys), len(loaded_keys)
34 |     )
35 |     max_match_size, idxs = match_matrix.max(1)
36 |     # remove indices that correspond to no-match
37 |     idxs[max_match_size == 0] = -1
38 | 
39 |     # used for logging
40 |     max_size = max([len(key) for key in current_keys]) if current_keys else 1
41 |     max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
42 |     log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
43 |     logger = logging.getLogger(__name__)
44 |     for idx_new, idx_old in enumerate(idxs.tolist()):
45 |         if idx_old == -1:
46 |             continue
47 |         key = current_keys[idx_new]
48 |         key_old = loaded_keys[idx_old]
49 |         model_state_dict[key] = loaded_state_dict[key_old]
50 |         logger.info(
51 |             log_str_template.format(
52 |                 key,
53 |                 max_size,
54 |                 key_old,
55 |                 max_size_loaded,
56 |                 tuple(loaded_state_dict[key_old].shape),
57 |             )
58 |         )
59 | 
60 | 
61 | def strip_prefix_if_present(state_dict, prefix):
62 |     keys = sorted(state_dict.keys())
63 |     if not all(key.startswith(prefix) for key in keys):
64 |         return state_dict
65 |     stripped_state_dict = OrderedDict()
66 |     for key, value in state_dict.items():
67 |         stripped_state_dict[key.replace(prefix, "")] = value
68 |     return stripped_state_dict
69 | 
70 | 
71 | def load_state_dict(model, loaded_state_dict):
72 |     model_state_dict = model.state_dict()
73 |     # if the state_dict comes from a model that was wrapped in a
74 |     # DataParallel or DistributedDataParallel during serialization,
75 |     # remove the "module" prefix before performing the matching
76 |     loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.")
77 |     align_and_update_state_dicts(model_state_dict, loaded_state_dict)
78 | 
79 |     # use strict loading
80 |     model.load_state_dict(model_state_dict)
81 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | from torch.utils.model_zoo import _download_url_to_file
 6 | from torch.utils.model_zoo import urlparse
 7 | from torch.utils.model_zoo import HASH_REGEX
 8 | 
 9 | from maskrcnn_benchmark.utils.comm import is_main_process
10 | from maskrcnn_benchmark.utils.comm import synchronize
11 | 
12 | 
13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
14 | # but with a few improvements and modifications
15 | def cache_url(url, model_dir=None, progress=True):
16 |     r"""Loads the Torch serialized object at the given URL.
17 |     If the object is already present in `model_dir`, it's deserialized and
18 |     returned. The filename part of the URL should follow the naming convention
19 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
20 |     digits of the SHA256 hash of the contents of the file. The hash is used to
21 |     ensure unique names and to verify the contents of the file.
22 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
23 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
24 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
25 |     Args:
26 |         url (string): URL of the object to download
27 |         model_dir (string, optional): directory in which to save the object
28 |         progress (bool, optional): whether or not to display a progress bar to stderr
29 |     Example:
30 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
31 |     """
32 |     if model_dir is None:
33 |         torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch'))
34 |         model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models'))
35 |     if not os.path.exists(model_dir):
36 |         os.makedirs(model_dir)
37 |     parts = urlparse(url)
38 |     filename = os.path.basename(parts.path)
39 |     if filename == "model_final.pkl":
40 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
41 |         # so make the full path the filename by replacing / with _
42 |         filename = parts.path.replace("/", "_")
43 |     cached_file = os.path.join(model_dir, filename)
44 |     if not os.path.exists(cached_file) and is_main_process():
45 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
46 |         hash_prefix = HASH_REGEX.search(filename)
47 |         if hash_prefix is not None:
48 |             hash_prefix = hash_prefix.group(1)
49 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
50 |             # which matches the hash PyTorch uses. So we skip the hash matching
51 |             # if the hash_prefix is less than 6 characters
52 |             if len(hash_prefix) < 6:
53 |                 hash_prefix = None
54 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
55 |     synchronize()
56 |     return cached_file
57 | 


--------------------------------------------------------------------------------
/run_test_R-50.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | MODEL=$1
 3 | 
 4 | for ITER in 0089999 0080001 0070001 0060001 0050001 0040001 0030001 0020001 0010001
 5 | do
 6 | 	python tools/test_net.py --config-file ./configs/retina/${MODEL}.yaml MODEL.WEIGHT ./models/${MODEL}/model_${ITER}.pth OUTPUT_DIR ./models/${MODEL}/${ITER} TEST.IMS_PER_BATCH 4
 7 | done
 8 | 
 9 | #for ITER in 89999
10 | #do
11 | #	python tools/test_net.py --config-file ./configs/retina/retinanet_R-50-FPN_1x.yaml MODEL.WEIGHT ./models/retinanet_R-50-FPN_1x_1101/model_00${ITER}.pth OUTPUT_DIR ./models/retinanet_R-50-FPN_1x_1101/${ITER} TEST.IMS_PER_BATCH 1
12 | #done
13 | 


--------------------------------------------------------------------------------
/run_test_R-50_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | MODEL=$1
 3 | 
 4 | for ITER in 0089999 0080001 0070001 0060001 0050001 0040001 0030001 0020001 0010001
 5 | do
 6 | 	python -m torch.distributed.launch --nproc_per_node=$NGPUS tools/test_net.py --config-file ./configs/retina/${MODEL}.yaml MODEL.WEIGHT ./models/${MODEL}/model_${ITER}.pth OUTPUT_DIR ./models/${MODEL}/${ITER} TEST.IMS_PER_BATCH 4
 7 | done
 8 | 
 9 | #for ITER in 89999
10 | #do
11 | #	python tools/test_net.py --config-file ./configs/retina/retinanet_R-50-FPN_1x.yaml MODEL.WEIGHT ./models/retinanet_R-50-FPN_1x_1101/model_00${ITER}.pth OUTPUT_DIR ./models/retinanet_R-50-FPN_1x_1101/${ITER} TEST.IMS_PER_BATCH 1
12 | #done
13 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/tools/parse_log.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import argparse
 3 | import numpy as np
 4 | 
 5 | def parse(log_path):
 6 |     with open(log_path) as f:
 7 |        text = f.read()
 8 | 
 9 |     float_pattern = r'\d+\.\d+'
10 |     mean_pattern = r'AdjustSmoothL1\(mean\): ({}), ({}), ({}), ({})'.format(
11 |         float_pattern, float_pattern, float_pattern, float_pattern)
12 |     var_pattern = r'AdjustSmoothL1\(var\): ({}), ({}), ({}), ({})'.format(
13 |         float_pattern, float_pattern, float_pattern, float_pattern)
14 |     pattern = mean_pattern + r'.*\n.*' + var_pattern + r'.*\n.*' + \
15 |         r'iter: (\d+)  ' + \
16 |         r'loss: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
17 |         r'loss_retina_cls: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
18 |         r'loss_retina_reg: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
19 |         r'loss_mask: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
20 |         r'time: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
21 |         r'data: ({}) \(({})\)  '.format(float_pattern, float_pattern) + \
22 |         r'lr: ({})  '.format(float_pattern) + \
23 |         r'max mem: (\d+)'
24 |     reg_exp = re.compile(pattern)
25 | 
26 |     headers = ['smooth_l1_mean', 'smooth_l1_var', 'iter', 'loss',
27 |                'loss_retina_cls', 'loss_retina_reg', 'loss_mask',
28 |                'time', 'data', 'lr', 'max_mem']
29 | 
30 |     iterations = list()
31 |     means = list()
32 |     variations = list()
33 |     running_losses = list()
34 |     for args in reg_exp.findall(text):
35 |         mean = [float(v) for v in args[0:4]]
36 |         var = [float(v) for v in args[5:8]]
37 |         iteration = int(args[8])
38 |         point_loss = float(args[9])
39 |         running_loss = float(args[10])
40 |         point_loss_retina_cls = float(args[11])
41 |         running_loss_retina_cls = float(args[12])
42 |         point_loss_retina_reg = float(args[13])
43 |         running_loss_retina_reg = float(args[14])
44 |         point_loss_mask = float(args[15])
45 |         running_loss_mask = float(args[16])
46 |         point_time = float(args[17])
47 |         running_time = float(args[18])
48 |         point_data = float(args[19])
49 |         running_data = float(args[20])
50 |         lr = float(args[21])
51 |         max_mem = int(args[22])
52 | 
53 |         iterations.append(iteration)
54 |         means.append(mean)
55 |         variations.append(var)
56 |         running_losses.append(running_loss)
57 | 
58 |     iterations = np.asarray(iterations)
59 |     means = np.asarray(means)
60 |     variations = np.asarray(variations)
61 |     running_losses = np.asarray(running_losses)
62 |     print(iterations)
63 |     print(means)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     parser = argparse.ArgumentParser(description='Parse log file')
68 |     parser.add_argument('log_path', metavar='P', help='path to the log file')
69 |     args = parser.parse_args()
70 | 
71 |     parse(args.log_path)
72 | 
73 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Set up custom environment before nearly anything else is imported
 3 | # NOTE: this should be the first import (no not reorder)
 4 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 5 | 
 6 | import argparse
 7 | import os
 8 | 
 9 | import torch
10 | from maskrcnn_benchmark.config import cfg
11 | from maskrcnn_benchmark.data import make_data_loader
12 | from maskrcnn_benchmark.engine.inference import inference
13 | from maskrcnn_benchmark.modeling.detector import build_detection_model
14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank
17 | from maskrcnn_benchmark.utils.logger import setup_logger
18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir
19 | 
20 | 
21 | def main():
22 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
23 |     parser.add_argument(
24 |         "--config-file",
25 |         default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
26 |         metavar="FILE",
27 |         help="path to config file",
28 |     )
29 |     parser.add_argument("--local_rank", type=int, default=0)
30 |     parser.add_argument(
31 |         "opts",
32 |         help="Modify config options using the command-line",
33 |         default=None,
34 |         nargs=argparse.REMAINDER,
35 |     )
36 | 
37 |     args = parser.parse_args()
38 | 
39 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
40 |     distributed = num_gpus > 1
41 | 
42 |     if distributed:
43 |         torch.cuda.set_device(args.local_rank)
44 |         torch.distributed.deprecated.init_process_group(
45 |             backend="nccl", init_method="env://"
46 |         )
47 | 
48 |     cfg.merge_from_file(args.config_file)
49 |     cfg.merge_from_list(args.opts)
50 |     cfg.freeze()
51 | 
52 |     save_dir = ""
53 |     logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
54 |     logger.info("Using {} GPUs".format(num_gpus))
55 |     logger.info(cfg)
56 | 
57 |     logger.info("Collecting env info (might take some time)")
58 |     logger.info("\n" + collect_env_info())
59 | 
60 |     model = build_detection_model(cfg)
61 |     model.to(cfg.MODEL.DEVICE)
62 | 
63 |     checkpointer = DetectronCheckpointer(cfg, model)
64 |     _ = checkpointer.load(cfg.MODEL.WEIGHT)
65 | 
66 |     iou_types = ("bbox",)
67 |     if cfg.MODEL.MASK_ON:
68 |         iou_types = iou_types + ("segm",)
69 |     output_folders = [None] * len(cfg.DATASETS.TEST)
70 |     if cfg.OUTPUT_DIR:
71 |         dataset_names = cfg.DATASETS.TEST
72 |         for idx, dataset_name in enumerate(dataset_names):
73 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
74 |             mkdir(output_folder)
75 |             output_folders[idx] = output_folder
76 |     data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
77 |     for output_folder, data_loader_val in zip(output_folders, data_loaders_val):
78 |         inference(
79 |             model,
80 |             data_loader_val,
81 |             iou_types=iou_types,
82 |             #box_only=cfg.MODEL.RPN_ONLY,
83 |             box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY,
84 |             device=cfg.MODEL.DEVICE,
85 |             expected_results=cfg.TEST.EXPECTED_RESULTS,
86 |             expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
87 |             output_folder=output_folder,
88 |         )
89 |         synchronize()
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------