├── .flake8 ├── .github └── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── feature-request.md │ └── questions-help-support.md ├── .gitignore ├── ABSTRACTIONS.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── OLD_README.md ├── README.md ├── TROUBLESHOOTING.md ├── arch.png ├── configs ├── caffe2 │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── quick_schedules │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── rpn_R_50_C4_quick.yaml │ └── rpn_R_50_FPN_quick.yaml └── retina │ ├── retinanet_R-101-FPN_1x.yaml │ ├── retinanet_R-50-FPN_1x.yaml │ ├── retinanet_R-50-FPN_1x_adjust_std011.yaml │ ├── retinanet_R-50-FPN_1x_adjust_std100.yaml │ ├── retinanet_R-50-FPN_1x_adjustl1.yaml │ ├── retinanet_R-50-FPN_1x_beta100.yaml │ ├── retinanet_R-50-FPN_1x_low_quality_0.2.yaml │ ├── retinanet_R-50-FPN_1x_low_quality_0.3.yaml │ ├── retinanet_R-50-FPN_1x_low_quality_0.4.yaml │ ├── retinanet_R-50-FPN_1x_no_low_quality.yaml │ ├── retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml │ ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml │ ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml │ ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml │ ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml │ ├── retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml │ ├── retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml │ ├── retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml │ ├── retinanet_mask_R-50-FPN_1.5x.yaml │ ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml │ ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml │ ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml │ ├── retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml │ ├── retinanet_mask_R-50-FPN_1x.yaml │ ├── retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml │ ├── retinanet_mask_R-50-FPN_canonical5_1.5x.yaml │ ├── retinanet_mask_R-50-FPN_canonical5_1x.yaml │ ├── retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml │ └── retinanet_mask_p2p7_R-50-FPN_1x.yaml ├── demo ├── Mask_R-CNN_demo.ipynb ├── README.md ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png ├── predictor.py └── webcam.py ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ └── list_dataset.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── adjust_smooth_l1_loss.py │ ├── batch_norm.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ ├── generalized_rcnn.py │ │ └── retinanet.py │ ├── matcher.py │ ├── poolers.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet.py │ │ ├── retinanet_detail_infer.py │ │ ├── retinanet_infer.py │ │ ├── retinanet_loss.py │ │ └── rpn.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ └── model_zoo.py ├── run_test_R-50.sh ├── run_test_R-50_dist.sh ├── setup.py ├── tests ├── checkpoint.py └── test_data_samplers.py └── tools ├── parse_log.py ├── test_net.py └── train_net.py /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = E203, E266, E501, W503 6 | max-line-length = 80 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: Submit a bug report to help us improve Mask R-CNN Benchmark 4 | 5 | --- 6 | 7 | ## 🐛 Bug 8 | 9 | 10 | 11 | ## To Reproduce 12 | 13 | Steps to reproduce the behavior: 14 | 15 | 1. 16 | 1. 17 | 1. 18 | 19 | 20 | 21 | ## Expected behavior 22 | 23 | 24 | 25 | ## Environment 26 | 27 | Please copy and paste the output from the 28 | [environment collection script from PyTorch](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) 29 | (or fill out the checklist below manually). 30 | 31 | You can get the script and run it with: 32 | ``` 33 | wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py 34 | # For security purposes, please check the contents of collect_env.py before running it. 35 | python collect_env.py 36 | ``` 37 | 38 | - PyTorch Version (e.g., 1.0): 39 | - OS (e.g., Linux): 40 | - How you installed PyTorch (`conda`, `pip`, source): 41 | - Build command you used (if compiling from source): 42 | - Python version: 43 | - CUDA/cuDNN version: 44 | - GPU models and configuration: 45 | - Any other relevant information: 46 | 47 | ## Additional context 48 | 49 | 50 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680Feature Request" 3 | about: Submit a proposal/request for a new Mask R-CNN Benchmark feature 4 | 5 | --- 6 | 7 | ## 🚀 Feature 8 | 9 | 10 | ## Motivation 11 | 12 | 13 | 14 | ## Pitch 15 | 16 | 17 | 18 | ## Alternatives 19 | 20 | 21 | 22 | ## Additional context 23 | 24 | 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/questions-help-support.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Questions/Help/Support" 3 | about: Do you need support? 4 | 5 | --- 6 | 7 | ## ❓ Questions and Help 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | maskrcnn_benchmark.egg-info/ 7 | build/ 8 | dist/ 9 | 10 | # pytorch/python/numpy formats 11 | *.pth 12 | *.pkl 13 | *.npy 14 | 15 | # ipython/jupyter notebooks 16 | *.ipynb 17 | **/.ipynb_checkpoints/ 18 | 19 | # Editor temporaries 20 | *.swn 21 | *.swo 22 | *.swp 23 | *~ 24 | 25 | # Pycharm editor settings 26 | .idea 27 | 28 | # project dirs 29 | /datasets 30 | /models 31 | -------------------------------------------------------------------------------- /ABSTRACTIONS.md: -------------------------------------------------------------------------------- 1 | ## Abstractions 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to 3 | have in mind are the following: 4 | 5 | ### ImageList 6 | In PyTorch, the first dimension of the input to the network generally represents 7 | the batch dimension, and thus all elements of the same batch have the same 8 | height / width. 9 | In order to support images with different sizes and aspect ratios in the same 10 | batch, we created the `ImageList` class, which holds internally a batch of 11 | images (os possibly different sizes). The images are padded with zeros such that 12 | they have the same final size and batched over the first dimension. The original 13 | sizes of the images before padding are stored in the `image_sizes` attribute, 14 | and the batched tensor in `tensors`. 15 | We provide a convenience function `to_image_list` that accepts a few different 16 | input types, including a list of tensors, and returns an `ImageList` object. 17 | 18 | ```python 19 | from maskrnn_benchmark.structures.image_list import to_image_list 20 | 21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)] 22 | batched_images = to_image_list(images) 23 | 24 | # it is also possible to make the final batched image be a multiple of a number 25 | batched_images_32 = to_image_list(images, size_divisible=32) 26 | ``` 27 | 28 | ### BoxList 29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for 30 | a specific image, as well as the size of the image as a `(width, height)` tuple. 31 | It also contains a set of methods that allow to perform geometric 32 | transformations to the bounding boxes (such as cropping, scaling and flipping). 33 | The class accepts bounding boxes from two different input formats: 34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and 35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`. 36 | 37 | Additionally, each `BoxList` instance can also hold arbitrary additional information 38 | for each bounding box, such as labels, visibility, probability scores etc. 39 | 40 | Here is an example on how to create a `BoxList` from a list of coordinates: 41 | ```python 42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT 43 | 44 | width = 100 45 | height = 200 46 | boxes = [ 47 | [0, 10, 50, 50], 48 | [50, 20, 90, 60], 49 | [10, 10, 50, 50] 50 | ] 51 | # create a BoxList with 3 boxes 52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy') 53 | 54 | # perform some box transformations, has similar API as PIL.Image 55 | bbox_scaled = bbox.resize((width * 2, height * 3)) 56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT) 57 | 58 | # add labels for each bbox 59 | labels = torch.tensor([0, 10, 1]) 60 | bbox.add_field('labels', labels) 61 | 62 | # bbox also support a few operations, like indexing 63 | # here, selects boxes 0 and 2 64 | bbox_subset = bbox[[0, 2]] 65 | ``` 66 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Mask-RCNN Benchmark 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis. 7 | 8 | ## Pull Requests 9 | We actively welcome your pull requests. 10 | 11 | 1. Fork the repo and create your branch from `master`. 12 | 2. If you've added code that should be tested, add tests. 13 | 3. If you've changed APIs, update the documentation. 14 | 4. Ensure the test suite passes. 15 | 5. Make sure your code lints. 16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 17 | 18 | ## Contributor License Agreement ("CLA") 19 | In order to accept your pull request, we need you to submit a CLA. You only need 20 | to do this once to work on any of Facebook's open source projects. 21 | 22 | Complete your CLA here: 23 | 24 | ## Issues 25 | We use GitHub issues to track public bugs. Please ensure your description is 26 | clear and has sufficient instructions to be able to reproduce the issue. 27 | 28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 29 | disclosure of security bugs. In those cases, please go through the process 30 | outlined on that page and do not file a public issue. 31 | 32 | ## Coding Style 33 | * 4 spaces for indentation rather than tabs 34 | * 80 character line length 35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/) 36 | 37 | ## License 38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/ 5 | - torchvision from master 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - (optional) OpenCV for the webcam demo 11 | 12 | 13 | ### Step-by-step installation 14 | 15 | ```bash 16 | # first, make sure that your conda is setup properly with the right environment 17 | # for that, check that `which conda`, `which pip` and `which python` points to the 18 | # right path. From a clean conda env, this is what you need to do 19 | 20 | conda create --name maskrcnn_benchmark 21 | source activate maskrcnn_benchmark 22 | 23 | # this installs the right pip and dependencies for the fresh python 24 | conda install ipython 25 | 26 | # maskrnn_benchmark and coco api dependencies 27 | pip install ninja yacs cython matplotlib 28 | 29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 30 | # we give the instructions for CUDA 9.0 31 | conda install pytorch-nightly -c pytorch 32 | 33 | # install torchvision 34 | cd ~/github 35 | git clone https://github.com/pytorch/vision.git 36 | cd vision 37 | python setup.py install 38 | 39 | # install pycocotools 40 | cd ~/github 41 | git clone https://github.com/cocodataset/cocoapi.git 42 | cd cocoapi/PythonAPI 43 | python setup.py build_ext install 44 | 45 | # install PyTorch Detection 46 | cd ~/github 47 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 48 | cd maskrcnn-benchmark 49 | # the following will install the lib with 50 | # symbolic links, so that you can modify 51 | # the files if you want and won't need to 52 | # re-build it 53 | python setup.py build develop 54 | 55 | # or if you are on macOS 56 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 57 | ``` 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Facebook 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | ## Model Zoo and Baselines 2 | 3 | ### Hardware 4 | - 8 NVIDIA V100 GPUs 5 | 6 | ### Software 7 | - PyTorch version: 1.0.0a0+dd2c487 8 | - CUDA 9.2 9 | - CUDNN 7.1 10 | - NCCL 2.2.13-1 11 | 12 | ### End-to-end Faster and Mask R-CNN baselines 13 | 14 | All the baselines were trained using the exact same experimental setup as in Detectron. 15 | We initialize the detection models with ImageNet weights from Caffe2, the same as used by Detectron. 16 | 17 | The pre-trained models are available in the link in the model id. 18 | 19 | backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | mask AP | model id 20 | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- 21 | R-50-C4 | Fast | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.17130 | 34.8 | - | [6358800](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_C4_1x.pth) 22 | R-50-FPN | Fast | 1x | 2 | 4.4 | 0.3530 | 8.8 | 0.12580 | 36.8 | - | [6358793](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_FPN_1x.pth) 23 | R-101-FPN | Fast | 1x | 2 | 7.1 | 0.4591 | 11.5 | 0.143149 | 39.1 | - | [6358804](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_101_FPN_1x.pth) 24 | X-101-32x8d-FPN | Fast | 1x | 1 | 7.6 | 0.7007 | 35.0 | 0.209965 | 41.2 | - | [6358717](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth) 25 | R-50-C4 | Mask | 1x | 1 | 5.8 | 0.4520 | 22.6 | 0.17796 + 0.028 | 35.6 | 31.5 | [6358801](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_C4_1x.pth) 26 | R-50-FPN | Mask | 1x | 2 | 5.2 | 0.4536 | 11.3 | 0.12966 + 0.034 | 37.8 | 34.2 | [6358792](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth) 27 | R-101-FPN | Mask | 1x | 2 | 7.9 | 0.5665 | 14.2 | 0.15384 + 0.034 | 40.1 | 36.1 | [6358805](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth) 28 | X-101-32x8d-FPN | Mask | 1x | 1 | 7.8 | 0.7562 | 37.8 | 0.21739 + 0.034 | 42.2 | 37.8 | [6358718](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth) 29 | 30 | 31 | ## Comparison with Detectron and mmdetection 32 | 33 | In the following section, we compare our implementation with [Detectron](https://github.com/facebookresearch/Detectron) 34 | and [mmdetection](https://github.com/open-mmlab/mmdetection). 35 | The same remarks from [mmdetection](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#training-speed) 36 | about different hardware applies here. 37 | 38 | ### Training speed 39 | 40 | The numbers here are in seconds / iteration. The lower, the better. 41 | 42 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 43 | -- | -- | -- | -- 44 | Faster R-CNN R-50 C4 | 0.566 | - | 0.4036 45 | Faster R-CNN R-50 FPN | 0.544 | 0.554 | 0.3530 46 | Faster R-CNN R-101 FPN | 0.647 | - | 0.4591 47 | Faster R-CNN X-101-32x8d FPN | 0.799 | - | 0.7007 48 | Mask R-CNN R-50 C4 | 0.620 | - | 0.4520 49 | Mask R-CNN R-50 FPN | 0.889 | 0.690 | 0.4536 50 | Mask R-CNN R-101 FPN | 1.008 | - | 0.5665 51 | Mask R-CNN X-101-32x8d FPN | 0.961 | - | 0.7562 52 | 53 | ### Training memory 54 | 55 | The lower, the better 56 | 57 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 58 | -- | -- | -- | -- 59 | Faster R-CNN R-50 C4 | 6.3 | - | 5.8 60 | Faster R-CNN R-50 FPN | 7.2 | 4.9 | 4.4 61 | Faster R-CNN R-101 FPN | 8.9 | - | 7.1 62 | Faster R-CNN X-101-32x8d FPN | 7.0 | - | 7.6 63 | Mask R-CNN R-50 C4 | 6.6 | - | 5.8 64 | Mask R-CNN R-50 FPN | 8.6 | 5.9 | 5.2 65 | Mask R-CNN R-101 FPN | 10.2 | - | 7.9 66 | Mask R-CNN X-101-32x8d FPN | 7.7 | - | 7.8 67 | 68 | ### Accuracy 69 | 70 | The higher, the better 71 | 72 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 73 | -- | -- | -- | -- 74 | Faster R-CNN R-50 C4 | 34.8 | - | 34.8 75 | Faster R-CNN R-50 FPN | 36.7 | 36.7 | 36.8 76 | Faster R-CNN R-101 FPN | 39.4 | - | 39.1 77 | Faster R-CNN X-101-32x8d FPN | 41.3 | - | 41.2 78 | Mask R-CNN R-50 C4 | 35.8 & 31.4 | - | 35.6 & 31.5 79 | Mask R-CNN R-50 FPN | 37.7 & 33.9 | 37.5 & 34.4 | 37.8 & 34.2 80 | Mask R-CNN R-101 FPN | 40.0 & 35.9 | - | 40.1 & 36.1 81 | Mask R-CNN X-101-32x8d FPN | 42.1 & 37.3 | - | 42.2 & 37.8 82 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RetinaMask 2 | 3 | The code is based on the [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). 4 | 5 | ![alt text](arch.png) 6 | 7 | 8 | ### Citing RetinaMask 9 | Please cite RetinaMask in your publications if it helps your research: 10 | 11 | @inproceedings{fu2019retinamask, 12 | title = {{RetinaMask}: Learning to predict masks improves state-of-the-art single-shot detection for free}, 13 | author = {Fu, Cheng-Yang and Shvets, Mykhailo and Berg, Alexander C.}, 14 | booktitle = {arXiv preprint arXiv:1901.03353}, 15 | year = {2019} 16 | } 17 | 18 | 19 | ### Contents 20 | 1. [Installation](#installation) 21 | 2. [Models](#models) 22 | 23 | ### Installation 24 | Follow the [maskrcnn-benchmark](./OLD_README.md) to install code and set up the dataset. Use config files in ./configs/retina/ for Training and Testing. 25 | 26 | 27 | ### Models 28 | 29 | | Models | BBox | B(time) | Mask | M(time) | Link | 30 | | -------------- | ------------- | ---| ----- | --- | ---- | 31 | | ResNet-50-FPN | 39.4/58.6/42.3/21.9/42.0/51.0 | 0.124 | 34.9/55.7/37.1/15.1/36.7/50.4 | 0.139 | [link](https://drive.google.com/file/d/17QnkNoibgzRnnSeLDFo27LmgPUifFBxz/view?usp=sharing) | 32 | | ResNet-101-FPN | 41.4/ 60.8/44.6/23.0/44.5/53.5 | 0.145 | 36.6/58.0/39.1/16.2/38.8/52.7 | 0.160 | [link](https://drive.google.com/file/d/1Fl-TI1oDRVBFYI1h1mn7IiEOXwTIR3AC/view?usp=sharing) | 33 | | ResNet-101-FPN-GN | 41.7/61.7/45.0/23.5/44.7/52.8 | 0.153 | 36.7/58.8/39.3/16.4/39.4/52.6 | 0.164 | [link](https://drive.google.com/file/d/19MCU9Q0YXbIrc4fRSPRV53T_RSxiVqh0/view?usp=sharing) | 34 | | ResNeXt32x8d-101-FPN-GN | 42.6/62.5/46.0/24.8/45.6/53.8 | 0.231 | 37.4/59.8/40.0/17.6/39.9/53.4 | 0.270 | [link](https://drive.google.com/file/d/1n-tcTUZ28s0uvDOB_gZmsFtxA8flGxKF/view?usp=sharing) | 35 | 36 | P.S. evaluation metric: AP, AP50, AP75, AP(small), AP(medium), AP(large), please refer to [COCO](http://cocodataset.org/#detection-eval) for detailed explanation. The inference time is measured on Nvidia 1080Ti. 37 | 38 | 39 | #### Run Inference 40 | Use the following scripts. (Assume models are download to the ./models directory) 41 | Run Mask and BBox 42 | ``` 43 | python tools/test_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml MODEL.WEIGHT ./models/retinanet_mask_R-50-FPN_2x_adjust_std011_ms_model.pth 44 | 45 | ``` 46 | Run BBox only 47 | ``` 48 | python tools/test_net.py --config-file ./configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml MODEL.WEIGHT ./models/retinanet_mask_R-50-FPN_2x_adjust_std011_ms_model.pth MODEL.MASK_ON False 49 | 50 | ``` 51 | -------------------------------------------------------------------------------- /TROUBLESHOOTING.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | Here is a compilation if common issues that you might face 4 | while compiling / running this code: 5 | 6 | ## Compilation errors when compiling the library 7 | If you encounter build errors like the following: 8 | ``` 9 | /usr/include/c++/6/type_traits:1558:8: note: provided for ‘template struct std::is_convertible’ 10 | struct is_convertible 11 | ^~~~~~~~~~~~~~ 12 | /usr/include/c++/6/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple&&; bool = true; _Elements = {at::Tensor, at::Tensor, at::Tensor, at::Tensor}]’ not a return-statement 13 | } 14 | ^ 15 | error: command '/usr/local/cuda/bin/nvcc' failed with exit status 1 16 | ``` 17 | check your CUDA version and your `gcc` version. 18 | ``` 19 | nvcc --version 20 | gcc --version 21 | ``` 22 | If you are using CUDA 9.0 and gcc 6.4.0, then refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/25, 23 | which has a summary of the solution. Basically, CUDA 9.0 is not compatible with gcc 6.4.0. 24 | 25 | ## ImportError: No module named maskrcnn_benchmark.config when running webcam.py 26 | 27 | This means that `maskrcnn-benchmark` has not been properly installed. 28 | Refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/22 for a few possible issues. 29 | Note that we now support Python 2 as well. 30 | 31 | ## Segmentation fault (core dumped) when running the library 32 | This probably means that you have compiled the library using GCC < 4.9, which is ABI incompatible with PyTorch. 33 | Indeed, during installation, you probably saw a message like 34 | ``` 35 | Your compiler (g++ 4.8) may be ABI-incompatible with PyTorch! 36 | Please use a compiler that is ABI-compatible with GCC 4.9 and above. 37 | See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html. 38 | 39 | See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 40 | for instructions on how to install GCC 4.9 or higher. 41 | ``` 42 | Follow the instructions on https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 43 | to install GCC 4.9 or higher, and try recompiling `maskrcnn-benchmark` again, after cleaning the 44 | `build` folder with 45 | ``` 46 | rm -rf build 47 | ``` 48 | -------------------------------------------------------------------------------- /arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/arch.png -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TEST: ("coco_2014_minival",) 24 | DATALOADER: 25 | SIZE_DIVISIBILITY: 32 26 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TEST: ("coco_2014_minival",) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | SOLVER: 28 | BASE_LR: 0.02 29 | WEIGHT_DECAY: 0.0001 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | RESNETS: 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | MASK_ON: True 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | BASE_LR: 0.02 38 | WEIGHT_DECAY: 0.0001 39 | STEPS: (60000, 80000) 40 | MAX_ITER: 90000 41 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | ROI_BOX_HEAD: 17 | POOLER_RESOLUTION: 7 18 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 19 | POOLER_SAMPLING_RATIO: 2 20 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 21 | PREDICTOR: "FPNPredictor" 22 | ROI_MASK_HEAD: 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 25 | PREDICTOR: "MaskRCNNC4Predictor" 26 | POOLER_RESOLUTION: 14 27 | POOLER_SAMPLING_RATIO: 2 28 | RESOLUTION: 28 29 | SHARE_BOX_FEATURE_EXTRACTOR: False 30 | RESNETS: 31 | STRIDE_IN_1X1: False 32 | NUM_GROUPS: 32 33 | WIDTH_PER_GROUP: 8 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: 600 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_minival",) 25 | TEST: ("coco_2014_minival",) 26 | INPUT: 27 | MIN_SIZE_TRAIN: 600 28 | MAX_SIZE_TRAIN: 1000 29 | MIN_SIZE_TEST: 800 30 | MAX_SIZE_TEST: 1000 31 | DATALOADER: 32 | SIZE_DIVISIBILITY: 32 33 | SOLVER: 34 | BASE_LR: 0.005 35 | WEIGHT_DECAY: 0.0001 36 | STEPS: (1500,) 37 | MAX_ITER: 2000 38 | IMS_PER_BATCH: 4 39 | TEST: 40 | IMS_PER_BATCH: 2 41 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | RESNETS: 24 | STRIDE_IN_1X1: False 25 | NUM_GROUPS: 32 26 | WIDTH_PER_GROUP: 8 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: 600 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: 600 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_minival",) 34 | TEST: ("coco_2014_minival",) 35 | INPUT: 36 | MIN_SIZE_TRAIN: 600 37 | MAX_SIZE_TRAIN: 1000 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1000 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | BASE_LR: 0.005 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (1500,) 46 | MAX_ITER: 2000 47 | IMS_PER_BATCH: 4 48 | TEST: 49 | IMS_PER_BATCH: 2 50 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | OUT_CHANNELS: 256 7 | RPN: 8 | USE_FPN: True 9 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 10 | PRE_NMS_TOP_N_TRAIN: 2000 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 1000 13 | FPN_POST_NMS_TOP_N_TEST: 1000 14 | ROI_HEADS: 15 | USE_FPN: True 16 | BATCH_SIZE_PER_IMAGE: 256 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | RESNETS: 32 | STRIDE_IN_1X1: False 33 | NUM_GROUPS: 32 34 | WIDTH_PER_GROUP: 8 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: 600 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: 600 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TEST: 1000 12 | POST_NMS_TOP_N_TEST: 2000 13 | FPN_POST_NMS_TOP_N_TEST: 2000 14 | DATASETS: 15 | TRAIN: ("coco_2014_minival",) 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: 600 19 | MAX_SIZE_TRAIN: 1000 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1000 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.005 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (1500,) 28 | MAX_ITER: 2000 29 | IMS_PER_BATCH: 4 30 | TEST: 31 | IMS_PER_BATCH: 2 32 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800, ) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (120000, 160000) 41 | MAX_ITER: 180000 42 | IMS_PER_BATCH: 8 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | 48 | 49 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_adjust_std011.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | SELFADJUST_SMOOTH_L1: True 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_adjust_std100.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | BBOX_REG_BETA: 1.0 48 | SELFADJUST_SMOOTH_L1: True 49 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_adjustl1.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | SELFADJUST_SMOOTH_L1: True 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_beta100.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | BBOX_REG_BETA: 1.0 48 | SELFADJUST_SMOOTH_L1: False 49 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_low_quality_0.2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | LOW_QUALITY_THRESHOLD: 0.2 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_low_quality_0.3.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | LOW_QUALITY_THRESHOLD: 0.3 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_low_quality_0.4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | LOW_QUALITY_THRESHOLD: 0.4 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_no_low_quality.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | LOW_QUALITY_MATCHES: False 48 | -------------------------------------------------------------------------------- /configs/retina/retinanet_R-50-FPN_1x_no_low_quality_adjustl1.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | INPUT: 30 | MIN_SIZE_TRAIN: (800,) 31 | MAX_SIZE_TRAIN: 1333 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 1333 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 4 gpus 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | RETINANET: 44 | RETINANET_ON: True 45 | SCALES_PER_OCTAVE: 3 46 | STRADDLE_THRESH: -1 47 | LOW_QUALITY_MATCHES: False 48 | SELFADJUST_SMOOTH_L1: True 49 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_400.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (400,) 41 | MAX_SIZE_TRAIN: 667 42 | MIN_SIZE_TEST: 400 43 | MAX_SIZE_TEST: 667 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_500.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | #TEST: ("coco_test-dev",) 40 | INPUT: 41 | MIN_SIZE_TRAIN: (500,) 42 | MAX_SIZE_TRAIN: 833 43 | MIN_SIZE_TEST: 500 44 | MAX_SIZE_TEST: 833 45 | DATALOADER: 46 | SIZE_DIVISIBILITY: 32 47 | SOLVER: 48 | # Assume 4 gpus 49 | BASE_LR: 0.005 50 | WEIGHT_DECAY: 0.0001 51 | STEPS: (180000, 240000) 52 | MAX_ITER: 270000 53 | IMS_PER_BATCH: 8 54 | RETINANET: 55 | RETINANET_ON: True 56 | BACKBONE: "p3p7" 57 | SCALES_PER_OCTAVE: 3 58 | STRADDLE_THRESH: -1 59 | NUM_MASKS_TEST: 50 60 | BBOX_REG_BETA: 0.11 61 | SELFADJUST_SMOOTH_L1: True 62 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 600 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_700.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (700,) 41 | MAX_SIZE_TRAIN: 1167 42 | MIN_SIZE_TEST: 700 43 | MAX_SIZE_TEST: 1167 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (360000, 480000) 51 | MAX_ITER: 540000 52 | IMS_PER_BATCH: 4 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_1.5x_adjust_std011_800.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (360000, 480000) 51 | MAX_ITER: 540000 52 | IMS_PER_BATCH: 4 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (640, 800, 1000) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (480000, 640000) 51 | MAX_ITER: 720000 52 | IMS_PER_BATCH: 4 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-101-FPN_2x_adjust_std011_ms_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | USE_GN: True 37 | DATASETS: 38 | TRAIN: ("coco_2017_train",) 39 | TEST: ("coco_2017_val",) 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 800, 1000) 42 | MAX_SIZE_TRAIN: 1333 43 | MIN_SIZE_TEST: 800 44 | MAX_SIZE_TEST: 1333 45 | DATALOADER: 46 | SIZE_DIVISIBILITY: 32 47 | SOLVER: 48 | # Assume 4 gpus 49 | BASE_LR: 0.005 50 | WEIGHT_DECAY: 0.0001 51 | STEPS: (240000, 320000) 52 | MAX_ITER: 360000 53 | IMS_PER_BATCH: 8 54 | RETINANET: 55 | RETINANET_ON: True 56 | BACKBONE: "p3p7" 57 | SCALES_PER_OCTAVE: 3 58 | STRADDLE_THRESH: -1 59 | NUM_MASKS_TEST: 50 60 | BBOX_REG_BETA: 0.11 61 | SELFADJUST_SMOOTH_L1: True 62 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1.5x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_400.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (400,) 41 | MAX_SIZE_TRAIN: 667 42 | MIN_SIZE_TEST: 400 43 | MAX_SIZE_TEST: 667 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_500.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (400,) 41 | MAX_SIZE_TRAIN: 667 42 | MIN_SIZE_TEST: 400 43 | MAX_SIZE_TEST: 667 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 600 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1.5x_adjust_std011_800.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (180000, 240000) 51 | MAX_ITER: 270000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800,) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (120000, 160000) 51 | MAX_ITER: 180000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_2x_adjust_std011_ms.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (640, 800, 1000) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.005 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (240000, 320000) 51 | MAX_ITER: 360000 52 | IMS_PER_BATCH: 8 53 | RETINANET: 54 | RETINANET_ON: True 55 | BACKBONE: "p3p7" 56 | SCALES_PER_OCTAVE: 3 57 | STRADDLE_THRESH: -1 58 | NUM_MASKS_TEST: 50 59 | BBOX_REG_BETA: 0.11 60 | SELFADJUST_SMOOTH_L1: True 61 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_canonical5_1.5x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | CANONICAL_LEVEL: 5 36 | MASK_ON: True 37 | DATASETS: 38 | TRAIN: ("coco_2017_train",) 39 | TEST: ("coco_2017_val",) 40 | INPUT: 41 | MIN_SIZE_TRAIN: (800,) 42 | MAX_SIZE_TRAIN: 1333 43 | MIN_SIZE_TEST: 800 44 | MAX_SIZE_TEST: 1333 45 | DATALOADER: 46 | SIZE_DIVISIBILITY: 32 47 | SOLVER: 48 | # Assume 4 gpus 49 | BASE_LR: 0.005 50 | WEIGHT_DECAY: 0.0001 51 | STEPS: (180000, 240000) 52 | MAX_ITER: 270000 53 | IMS_PER_BATCH: 8 54 | RETINANET: 55 | RETINANET_ON: True 56 | BACKBONE: "p3p7" 57 | SCALES_PER_OCTAVE: 3 58 | STRADDLE_THRESH: -1 59 | NUM_MASKS_TEST: 50 60 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_R-50-FPN_canonical5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | CANONICAL_LEVEL: 5 36 | MASK_ON: True 37 | DATASETS: 38 | TRAIN: ("coco_2017_train",) 39 | TEST: ("coco_2017_val",) 40 | INPUT: 41 | MIN_SIZE_TRAIN: (800,) 42 | MAX_SIZE_TRAIN: 1333 43 | MIN_SIZE_TEST: 800 44 | MAX_SIZE_TEST: 1333 45 | DATALOADER: 46 | SIZE_DIVISIBILITY: 32 47 | SOLVER: 48 | # Assume 4 gpus 49 | BASE_LR: 0.005 50 | WEIGHT_DECAY: 0.0001 51 | STEPS: (120000, 160000) 52 | MAX_ITER: 180000 53 | IMS_PER_BATCH: 8 54 | RETINANET: 55 | RETINANET_ON: True 56 | BACKBONE: "p3p7" 57 | SCALES_PER_OCTAVE: 3 58 | STRADDLE_THRESH: -1 59 | NUM_MASKS_TEST: 50 60 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_X-101-32x8d_FPN_2x_adjust_std011_ms_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.125, 0.0625, 0.03125) 28 | #POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | RESNETS: 36 | STRIDE_IN_1X1: False 37 | NUM_GROUPS: 32 38 | WIDTH_PER_GROUP: 8 39 | MASK_ON: True 40 | USE_GN: True 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | #TEST: ("coco_test-dev",) 45 | INPUT: 46 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 840, 880, 920, 960, 1000) 47 | MAX_SIZE_TRAIN: 1333 48 | MIN_SIZE_TEST: 800 49 | MAX_SIZE_TEST: 1333 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 4 gpus 54 | BASE_LR: 0.0025 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (480000, 640000) 57 | MAX_ITER: 720000 58 | IMS_PER_BATCH: 4 59 | RETINANET: 60 | RETINANET_ON: True 61 | BACKBONE: "p3p7" 62 | SCALES_PER_OCTAVE: 3 63 | STRADDLE_THRESH: -1 64 | NUM_MASKS_TEST: 50 65 | BBOX_REG_BETA: 0.11 66 | SELFADJUST_SMOOTH_L1: True 67 | -------------------------------------------------------------------------------- /configs/retina/retinanet_mask_p2p7_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | FG_IOU_THRESHOLD: 0.5 11 | BG_IOU_THRESHOLD: 0.4 12 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 13 | PRE_NMS_TOP_N_TRAIN: 2000 14 | PRE_NMS_TOP_N_TEST: 1000 15 | POST_NMS_TOP_N_TEST: 1000 16 | FPN_POST_NMS_TOP_N_TEST: 1000 17 | ROI_HEADS: 18 | USE_FPN: True 19 | BATCH_SIZE_PER_IMAGE: 256 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2017_train",) 37 | TEST: ("coco_2017_val",) 38 | INPUT: 39 | MIN_SIZE_TRAIN: (800,) 40 | MAX_SIZE_TRAIN: 1333 41 | MIN_SIZE_TEST: 800 42 | MAX_SIZE_TEST: 1333 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 4 gpus 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (120000, 160000) 50 | MAX_ITER: 180000 51 | IMS_PER_BATCH: 8 52 | RETINANET: 53 | RETINANET_ON: True 54 | BACKBONE: "p2p7" 55 | SCALES_PER_OCTAVE: 3 56 | STRADDLE_THRESH: -1 57 | NUM_MASKS_TEST: 50 58 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference. 4 | 5 | You can start it by running it from this folder, using one of the following commands: 6 | ```bash 7 | # by default, it runs on the GPU 8 | # for best results, use min-image-size 800 9 | python webcam.py --min-image-size 800 10 | # can also run it on the CPU 11 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 12 | # or change the model that you want to use 13 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.py --min-image-size 300 MODEL.DEVICE cpu 14 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 15 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 16 | ``` 17 | -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png -------------------------------------------------------------------------------- /demo/webcam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import argparse 3 | import cv2 4 | 5 | from maskrcnn_benchmark.config import cfg 6 | from predictor import COCODemo 7 | 8 | import time 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") 13 | parser.add_argument( 14 | "--config-file", 15 | default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", 16 | metavar="FILE", 17 | help="path to config file", 18 | ) 19 | parser.add_argument( 20 | "--confidence-threshold", 21 | type=float, 22 | default=0.7, 23 | help="Minimum score for the prediction to be shown", 24 | ) 25 | parser.add_argument( 26 | "--min-image-size", 27 | type=int, 28 | default=224, 29 | help="Smallest size of the image to feed to the model. " 30 | "Model was trained with 800, which gives best results", 31 | ) 32 | parser.add_argument( 33 | "--show-mask-heatmaps", 34 | dest="show_mask_heatmaps", 35 | help="Show a heatmap probability for the top masks-per-dim masks", 36 | action="store_true", 37 | ) 38 | parser.add_argument( 39 | "--masks-per-dim", 40 | type=int, 41 | default=2, 42 | help="Number of heatmaps per dimension to show", 43 | ) 44 | parser.add_argument( 45 | "opts", 46 | help="Modify model config options using the command-line", 47 | default=None, 48 | nargs=argparse.REMAINDER, 49 | ) 50 | 51 | args = parser.parse_args() 52 | 53 | # load config from file and command-line arguments 54 | cfg.merge_from_file(args.config_file) 55 | cfg.merge_from_list(args.opts) 56 | cfg.freeze() 57 | 58 | # prepare object that handles inference plus adds predictions on top of image 59 | coco_demo = COCODemo( 60 | cfg, 61 | confidence_threshold=args.confidence_threshold, 62 | show_mask_heatmaps=args.show_mask_heatmaps, 63 | masks_per_dim=args.masks_per_dim, 64 | min_image_size=args.min_image_size, 65 | ) 66 | 67 | cam = cv2.VideoCapture(0) 68 | while True: 69 | start_time = time.time() 70 | ret_val, img = cam.read() 71 | composite = coco_demo.run_on_opencv_image(img) 72 | print("Time: {:.2f} s / img".format(time.time() - start_time)) 73 | cv2.imshow("COCO detections", composite) 74 | if cv2.waitKey(1) == 27: 75 | break # esc to quit 76 | cv2.destroyAllWindows() 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 22 | const at::Tensor& rois, 23 | const float spatial_scale, 24 | const int pooled_height, 25 | const int pooled_width, 26 | const int sampling_ratio); 27 | 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 29 | const at::Tensor& rois, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width, 37 | const int sampling_ratio); 38 | 39 | 40 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width); 45 | 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 47 | const at::Tensor& input, 48 | const at::Tensor& rois, 49 | const at::Tensor& argmax, 50 | const float spatial_scale, 51 | const int pooled_height, 52 | const int pooled_width, 53 | const int batch_size, 54 | const int channels, 55 | const int height, 56 | const int width); 57 | 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 59 | 60 | 61 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 62 | const int height, 63 | const int width); 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("nms", &nms, "non-maximum suppression"); 9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 13 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 14 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 15 | } 16 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .concat_dataset import ConcatDataset 4 | 5 | __all__ = ["COCODataset", "ConcatDataset"] 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 7 | 8 | 9 | class COCODataset(torchvision.datasets.coco.CocoDetection): 10 | def __init__( 11 | self, ann_file, root, remove_images_without_annotations, transforms=None 12 | ): 13 | super(COCODataset, self).__init__(root, ann_file) 14 | 15 | # sort indices for reproducible results 16 | self.ids = sorted(self.ids) 17 | 18 | # filter images without detection annotations 19 | if remove_images_without_annotations: 20 | self.ids = [ 21 | img_id 22 | for img_id in self.ids 23 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 24 | ] 25 | 26 | self.json_category_id_to_contiguous_id = { 27 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 28 | } 29 | self.contiguous_category_id_to_json_id = { 30 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 31 | } 32 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 33 | self.transforms = transforms 34 | 35 | def __getitem__(self, idx): 36 | img, anno = super(COCODataset, self).__getitem__(idx) 37 | 38 | # filter crowd annotations 39 | # TODO might be better to add an extra field 40 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 41 | 42 | boxes = [obj["bbox"] for obj in anno] 43 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 44 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 45 | 46 | classes = [obj["category_id"] for obj in anno] 47 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 48 | classes = torch.tensor(classes) 49 | target.add_field("labels", classes) 50 | 51 | masks = [obj["segmentation"] for obj in anno] 52 | masks = SegmentationMask(masks, img.size) 53 | target.add_field("masks", masks) 54 | 55 | target = target.clip_to_image(remove_empty=True) 56 | 57 | if self.transforms is not None: 58 | img, target = self.transforms(img, target) 59 | 60 | return img, target, idx 61 | 62 | def get_img_info(self, index): 63 | img_id = self.id_to_img_map[index] 64 | img_data = self.coco.imgs[img_id] 65 | return img_data 66 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed, 3 | # with a modification in the import to use the deprecated backend 4 | # FIXME remove this once c10d fixes the bug it has 5 | import math 6 | import torch 7 | import torch.distributed.deprecated as dist 8 | from torch.utils.data.sampler import Sampler 9 | 10 | 11 | class DistributedSampler(Sampler): 12 | """Sampler that restricts data loading to a subset of the dataset. 13 | It is especially useful in conjunction with 14 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 15 | process can pass a DistributedSampler instance as a DataLoader sampler, 16 | and load a subset of the original dataset that is exclusive to it. 17 | .. note:: 18 | Dataset is assumed to be of constant size. 19 | Arguments: 20 | dataset: Dataset used for sampling. 21 | num_replicas (optional): Number of processes participating in 22 | distributed training. 23 | rank (optional): Rank of the current process within num_replicas. 24 | """ 25 | 26 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 27 | if num_replicas is None: 28 | if not dist.is_available(): 29 | raise RuntimeError("Requires distributed package to be available") 30 | num_replicas = dist.get_world_size() 31 | if rank is None: 32 | if not dist.is_available(): 33 | raise RuntimeError("Requires distributed package to be available") 34 | rank = dist.get_rank() 35 | self.dataset = dataset 36 | self.num_replicas = num_replicas 37 | self.rank = rank 38 | self.epoch = 0 39 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 40 | self.total_size = self.num_samples * self.num_replicas 41 | self.shuffle = True 42 | 43 | def __iter__(self): 44 | if self.shuffle: 45 | # deterministically shuffle based on epoch 46 | g = torch.Generator() 47 | g.manual_seed(self.epoch) 48 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 49 | else: 50 | indices = torch.arange(len(self.dataset)).tolist() 51 | 52 | # add extra samples to make it evenly divisible 53 | indices += indices[: (self.total_size - len(indices))] 54 | assert len(indices) == self.total_size 55 | 56 | # subsample 57 | offset = self.num_samples * self.rank 58 | indices = indices[offset : offset + self.num_samples] 59 | assert len(indices) == self.num_samples 60 | 61 | return iter(indices) 62 | 63 | def __len__(self): 64 | return self.num_samples 65 | 66 | def set_epoch(self, epoch): 67 | self.epoch = epoch 68 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | 10 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | resize = T.MultiScaleResize(min_size, max_size) 11 | else: 12 | min_size = cfg.INPUT.MIN_SIZE_TEST 13 | max_size = cfg.INPUT.MAX_SIZE_TEST 14 | flip_prob = 0 15 | resize = T.Resize(min_size, max_size) 16 | 17 | to_bgr255 = cfg.INPUT.TO_BGR255 18 | normalize_transform = T.Normalize( 19 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 20 | ) 21 | 22 | transform = T.Compose( 23 | [ 24 | resize, 25 | T.RandomHorizontalFlip(flip_prob), 26 | T.ToTensor(), 27 | normalize_transform, 28 | ] 29 | ) 30 | return transform 31 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import random 3 | 4 | import torch 5 | import torchvision 6 | from torchvision.transforms import functional as F 7 | 8 | 9 | class Compose(object): 10 | def __init__(self, transforms): 11 | self.transforms = transforms 12 | 13 | def __call__(self, image, target): 14 | for t in self.transforms: 15 | image, target = t(image, target) 16 | return image, target 17 | 18 | def __repr__(self): 19 | format_string = self.__class__.__name__ + "(" 20 | for t in self.transforms: 21 | format_string += "\n" 22 | format_string += " {0}".format(t) 23 | format_string += "\n)" 24 | return format_string 25 | 26 | 27 | class Resize(object): 28 | def __init__(self, min_size, max_size): 29 | self.min_size = min_size 30 | self.max_size = max_size 31 | 32 | # modified from torchvision to add support for max size 33 | def get_size(self, image_size): 34 | w, h = image_size 35 | size = self.min_size 36 | max_size = self.max_size 37 | if max_size is not None: 38 | min_original_size = float(min((w, h))) 39 | max_original_size = float(max((w, h))) 40 | if max_original_size / min_original_size * size > max_size: 41 | size = int(round(max_size * min_original_size / max_original_size)) 42 | 43 | if (w <= h and w == size) or (h <= w and h == size): 44 | return (h, w) 45 | 46 | if w < h: 47 | ow = size 48 | oh = int(size * h / w) 49 | else: 50 | oh = size 51 | ow = int(size * w / h) 52 | 53 | return (oh, ow) 54 | 55 | def __call__(self, image, target): 56 | size = self.get_size(image.size) 57 | image = F.resize(image, size) 58 | target = target.resize(image.size) 59 | return image, target 60 | 61 | 62 | class MultiScaleResize(object): 63 | def __init__(self, min_sizes, max_size): 64 | self.resizers = [] 65 | for min_size in min_sizes: 66 | self.resizers.append(Resize(min_size, max_size)) 67 | 68 | def __call__(self, image, target): 69 | resizer = random.choice(self.resizers) 70 | image, target = resizer(image, target) 71 | 72 | return image, target 73 | 74 | 75 | class RandomHorizontalFlip(object): 76 | def __init__(self, prob=0.5): 77 | self.prob = prob 78 | 79 | def __call__(self, image, target): 80 | if random.random() < self.prob: 81 | image = F.hflip(image) 82 | target = target.transpose(0) 83 | return image, target 84 | 85 | 86 | class ToTensor(object): 87 | def __call__(self, image, target): 88 | return F.to_tensor(image), target 89 | 90 | 91 | class Normalize(object): 92 | def __init__(self, mean, std, to_bgr255=True): 93 | self.mean = mean 94 | self.std = std 95 | self.to_bgr255 = to_bgr255 96 | 97 | def __call__(self, image, target): 98 | if self.to_bgr255: 99 | image = image[[2, 1, 0]] * 255 100 | image = F.normalize(image, mean=self.mean, std=self.std) 101 | return image, target 102 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/trainer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import time 5 | 6 | import torch 7 | from torch.distributed import deprecated as dist 8 | 9 | from maskrcnn_benchmark.utils.comm import get_world_size, get_rank 10 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 11 | 12 | 13 | def reduce_loss_dict(loss_dict): 14 | """ 15 | Reduce the loss dictionary from all processes so that process with rank 16 | 0 has the averaged results. Returns a dict with the same fields as 17 | loss_dict, after reduction. 18 | """ 19 | world_size = get_world_size() 20 | if world_size < 2: 21 | return loss_dict 22 | with torch.no_grad(): 23 | loss_names = [] 24 | all_losses = [] 25 | for k, v in loss_dict.items(): 26 | loss_names.append(k) 27 | all_losses.append(v) 28 | all_losses = torch.stack(all_losses, dim=0) 29 | dist.reduce(all_losses, dst=0) 30 | if dist.get_rank() == 0: 31 | # only main process gets accumulated, so only divide by 32 | # world_size in this case 33 | all_losses /= world_size 34 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} 35 | return reduced_losses 36 | 37 | 38 | def do_train( 39 | model, 40 | data_loader, 41 | optimizer, 42 | scheduler, 43 | checkpointer, 44 | device, 45 | checkpoint_period, 46 | arguments, 47 | ): 48 | logger = logging.getLogger("maskrcnn_benchmark.trainer") 49 | logger.info("Start training") 50 | meters = MetricLogger(delimiter=" ") 51 | max_iter = len(data_loader) 52 | start_iter = arguments["iteration"] 53 | model.train() 54 | start_training_time = time.time() 55 | end = time.time() 56 | for iteration, (images, targets, _) in enumerate(data_loader, start_iter): 57 | data_time = time.time() - end 58 | arguments["iteration"] = iteration 59 | 60 | scheduler.step() 61 | 62 | images = images.to(device) 63 | targets = [target.to(device) for target in targets] 64 | 65 | loss_dict = model(images, targets) 66 | 67 | losses = sum(loss for loss in loss_dict.values()) 68 | 69 | # reduce losses over all GPUs for logging purposes 70 | loss_dict_reduced = reduce_loss_dict(loss_dict) 71 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 72 | meters.update(loss=losses_reduced, **loss_dict_reduced) 73 | 74 | optimizer.zero_grad() 75 | losses.backward() 76 | optimizer.step() 77 | 78 | batch_time = time.time() - end 79 | end = time.time() 80 | meters.update(time=batch_time, data=data_time) 81 | 82 | eta_seconds = meters.time.global_avg * (max_iter - iteration) 83 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 84 | 85 | #if iteration % 20 == 0 or iteration == (max_iter - 1): 86 | if True: 87 | logger.info( 88 | meters.delimiter.join( 89 | [ 90 | "eta: {eta}", 91 | "iter: {iter}", 92 | "{meters}", 93 | "lr: {lr:.6f}", 94 | "max mem: {memory:.0f}", 95 | ] 96 | ).format( 97 | eta=eta_string, 98 | iter=iteration, 99 | meters=str(meters), 100 | lr=optimizer.param_groups[0]["lr"], 101 | memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, 102 | ) 103 | ) 104 | if iteration % checkpoint_period == 0 and iteration > 0: 105 | checkpointer.save("model_{:07d}".format(iteration+1), **arguments) 106 | 107 | checkpointer.save("model_{:07d}".format(iteration), **arguments) 108 | total_training_time = time.time() - start_training_time 109 | total_time_str = str(datetime.timedelta(seconds=total_training_time)) 110 | logger.info( 111 | "Total training time: {} ({:.4f} s / it)".format( 112 | total_time_str, total_training_time / (max_iter) 113 | ) 114 | ) 115 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import ConvTranspose2d 7 | from .misc import interpolate 8 | from .nms import nms 9 | from .roi_align import ROIAlign 10 | from .roi_align import roi_align 11 | from .roi_pool import ROIPool 12 | from .roi_pool import roi_pool 13 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss 14 | from .sigmoid_focal_loss import SigmoidFocalLoss 15 | from .adjust_smooth_l1_loss import AdjustSmoothL1Loss 16 | 17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 18 | "smooth_l1_loss", "SmoothL1Loss", "Conv2d", "ConvTranspose2d", 19 | "interpolate", "FrozenBatchNorm2d", "SigmoidFocalLoss", 20 | "AdjustSmoothL1Loss"] 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/adjust_smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | import logging 5 | from torch.distributed import deprecated as dist 6 | 7 | class AdjustSmoothL1Loss(nn.Module): 8 | 9 | def __init__(self, num_features, momentum=0.1, beta=1. /9): 10 | super(AdjustSmoothL1Loss, self).__init__() 11 | self.num_features = num_features 12 | self.momentum = momentum 13 | self.beta = beta 14 | self.register_buffer( 15 | 'running_mean', torch.empty(num_features).fill_(beta) 16 | ) 17 | self.register_buffer('running_var', torch.zeros(num_features)) 18 | self.logger = logging.getLogger("maskrcnn_benchmark.trainer") 19 | 20 | def forward(self, inputs, target, size_average=True): 21 | 22 | n = torch.abs(inputs -target) 23 | with torch.no_grad(): 24 | if torch.isnan(n.var(dim=0)).sum().item() == 0: 25 | self.running_mean = self.running_mean.to(n.device) 26 | self.running_mean *= (1 - self.momentum) 27 | self.running_mean += (self.momentum * n.mean(dim=0)) 28 | self.running_var = self.running_var.to(n.device) 29 | self.running_var *= (1 - self.momentum) 30 | self.running_var += (self.momentum * n.var(dim=0)) 31 | 32 | 33 | beta = (self.running_mean - self.running_var) 34 | beta = beta.clamp(max=self.beta, min=1e-3) 35 | 36 | beta = beta.view(-1, self.num_features).to(n.device) 37 | cond = n < beta.expand_as(n) 38 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 39 | if size_average: 40 | return loss.mean() 41 | return loss.sum() 42 | 43 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | helper class that supports empty tensors on some nn functions. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in 6 | those functions. 7 | 8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013 9 | is implemented 10 | """ 11 | 12 | import math 13 | import torch 14 | from torch.nn.modules.utils import _ntuple 15 | 16 | 17 | class _NewEmptyTensorOp(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, new_shape): 20 | ctx.shape = x.shape 21 | return x.new_empty(new_shape) 22 | 23 | @staticmethod 24 | def backward(ctx, grad): 25 | shape = ctx.shape 26 | return _NewEmptyTensorOp.apply(grad, shape), None 27 | 28 | 29 | 30 | class Conv2d(torch.nn.Conv2d): 31 | def forward(self, x): 32 | if x.numel() > 0: 33 | return super(Conv2d, self).forward(x) 34 | # get output shape 35 | 36 | output_shape = [ 37 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 38 | for i, p, di, k, d in zip( 39 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride 40 | ) 41 | ] 42 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 43 | return _NewEmptyTensorOp.apply(x, output_shape) 44 | 45 | 46 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 47 | def forward(self, x): 48 | if x.numel() > 0: 49 | return super(ConvTranspose2d, self).forward(x) 50 | # get output shape 51 | 52 | output_shape = [ 53 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 54 | for i, p, di, k, d, op in zip( 55 | x.shape[-2:], 56 | self.padding, 57 | self.dilation, 58 | self.kernel_size, 59 | self.stride, 60 | self.output_padding, 61 | ) 62 | ] 63 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape 64 | return _NewEmptyTensorOp.apply(x, output_shape) 65 | 66 | 67 | def interpolate( 68 | input, size=None, scale_factor=None, mode="nearest", align_corners=None 69 | ): 70 | if input.numel() > 0: 71 | return torch.nn.functional.interpolate( 72 | input, size, scale_factor, mode, align_corners 73 | ) 74 | 75 | def _check_size_scale_factor(dim): 76 | if size is None and scale_factor is None: 77 | raise ValueError("either size or scale_factor should be defined") 78 | if size is not None and scale_factor is not None: 79 | raise ValueError("only one of size or scale_factor should be defined") 80 | if ( 81 | scale_factor is not None 82 | and isinstance(scale_factor, tuple) 83 | and len(scale_factor) != dim 84 | ): 85 | raise ValueError( 86 | "scale_factor shape must match input shape. " 87 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) 88 | ) 89 | 90 | def _output_size(dim): 91 | _check_size_scale_factor(dim) 92 | if size is not None: 93 | return size 94 | scale_factors = _ntuple(dim)(scale_factor) 95 | # math.floor might return float in py2.7 96 | return [ 97 | int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) 98 | ] 99 | 100 | output_shape = tuple(_output_size(2)) 101 | output_shape = input.shape[:-2] + output_shape 102 | return _NewEmptyTensorOp.apply(input, output_shape) 103 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from maskrcnn_benchmark import _C 7 | 8 | 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, num_classes, gamma, alpha): 12 | ctx.save_for_backward(logits, targets); 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | losses = _C.sigmoid_focalloss_forward( 18 | logits, targets, num_classes, gamma, alpha 19 | ) 20 | return losses 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | logits, targets = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_logits = _C.sigmoid_focalloss_backward( 31 | logits, targets, d_loss, num_classes, gamma, alpha 32 | ) 33 | return d_logits, None, None, None, None 34 | 35 | 36 | sigmoid_focalloss = _SigmoidFocalLoss.apply 37 | 38 | 39 | class SigmoidFocalLoss(nn.Module): 40 | def __init__(self, num_classes, gamma, alpha): 41 | super(SigmoidFocalLoss, self).__init__() 42 | self.num_classes = num_classes 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | loss = sigmoid_focalloss( 48 | logits, targets, self.num_classes, self.gamma, self.alpha 49 | ) 50 | return loss.sum() 51 | 52 | def __repr__(self): 53 | tmpstr = self.__class__.__name__ + "(" 54 | tmpstr += "num_classes=" + str(self.num_classes) 55 | tmpstr += ", gamma=" + str(self.gamma) 56 | tmpstr += ", alpha=" + str(self.alpha) 57 | tmpstr += ")" 58 | return tmpstr 59 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class SmoothL1Loss(torch.nn.Module): 6 | def __init__(self, beta=1. /9): 7 | super(SmoothL1Loss, self).__init__() 8 | self.beta = beta 9 | 10 | def forward(self, input, target, size_average=True): 11 | return smooth_l1_loss(input, target, self.beta, size_average) 12 | 13 | 14 | # TODO maybe push this to nn? 15 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 16 | """ 17 | very similar to the smooth_l1_loss from pytorch, but with 18 | the extra beta parameter 19 | """ 20 | n = torch.abs(input - target) 21 | cond = n < beta 22 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 23 | if size_average: 24 | return loss.mean() 25 | return loss.sum() 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from . import fpn as fpn_module 7 | from . import resnet 8 | 9 | 10 | def build_resnet_backbone(cfg): 11 | body = resnet.ResNet(cfg) 12 | model = nn.Sequential(OrderedDict([("body", body)])) 13 | return model 14 | 15 | 16 | def build_resnet_fpn_backbone(cfg): 17 | body = resnet.ResNet(cfg) 18 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 19 | out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS 20 | fpn = fpn_module.FPN( 21 | in_channels_list=[ 22 | in_channels_stage2, 23 | in_channels_stage2 * 2, 24 | in_channels_stage2 * 4, 25 | in_channels_stage2 * 8, 26 | ], 27 | out_channels=out_channels, 28 | top_blocks=fpn_module.LastLevelMaxPool(), 29 | use_gn=cfg.MODEL.USE_GN 30 | ) 31 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 32 | return model 33 | 34 | 35 | def build_resnet_fpn_p3p7_backbone(cfg): 36 | body = resnet.ResNet(cfg) 37 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 38 | out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS 39 | fpn = fpn_module.FPN( 40 | in_channels_list=[ 41 | 0, 42 | in_channels_stage2 * 2, 43 | in_channels_stage2 * 4, 44 | in_channels_stage2 * 8, 45 | ], 46 | out_channels=out_channels, 47 | top_blocks=fpn_module.LastLevelP6P7(out_channels), 48 | use_gn=cfg.MODEL.USE_GN 49 | ) 50 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 51 | return model 52 | 53 | 54 | _BACKBONES = {"resnet": build_resnet_backbone, 55 | "resnet-fpn": build_resnet_fpn_backbone, 56 | "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone, 57 | } 58 | 59 | 60 | def build_resnet_fpn_p2p7_backbone(cfg): 61 | body = resnet.ResNet(cfg) 62 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 63 | out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS 64 | fpn = fpn_module.FPN( 65 | in_channels_list=[ 66 | in_channels_stage2, 67 | in_channels_stage2 * 2, 68 | in_channels_stage2 * 4, 69 | in_channels_stage2 * 8, 70 | ], 71 | out_channels=out_channels, 72 | top_blocks=fpn_module.LastLevelP6P7(out_channels), 73 | use_gn=cfg.MODEL.USE_GN 74 | ) 75 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 76 | return model 77 | 78 | 79 | _BACKBONES = {"resnet": build_resnet_backbone, 80 | "resnet-fpn": build_resnet_fpn_backbone, 81 | "resnet-fpn-retina": build_resnet_fpn_p3p7_backbone, 82 | } 83 | 84 | 85 | 86 | def build_backbone(cfg): 87 | assert cfg.MODEL.BACKBONE.CONV_BODY.startswith( 88 | "R-" 89 | ), "Only ResNet and ResNeXt models are currently implemented" 90 | # Models using FPN end with "-FPN" 91 | if cfg.MODEL.BACKBONE.CONV_BODY.endswith("-FPN"): 92 | if cfg.RETINANET.RETINANET_ON: 93 | if cfg.RETINANET.BACKBONE == "p3p7": 94 | return build_resnet_fpn_p3p7_backbone(cfg) 95 | elif cfg.RETINANET.BACKBONE == "p2p7": 96 | return build_resnet_fpn_p2p7_backbone(cfg) 97 | else: 98 | raise Exception("Wrong Setting {}:{}".format( 99 | 'cfg.RETINANET.BACKBONE', cfg.RETINANET.BACKBBACKBONE)) 100 | else: 101 | return build_resnet_fpn_backbone(cfg) 102 | return build_resnet_backbone(cfg) 103 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | from .retinanet import RetinaNet 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, 6 | "RetinaNet": RetinaNet} 7 | 8 | 9 | def build_detection_model(cfg): 10 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 11 | return meta_arch(cfg) 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..rpn.retinanet import build_retinanet 14 | from ..roi_heads.roi_heads import build_roi_heads 15 | 16 | 17 | class GeneralizedRCNN(nn.Module): 18 | """ 19 | Main class for Generalized R-CNN. Currently supports boxes and masks. 20 | It consists of three main parts: 21 | - backbone 22 | = rpn 23 | - heads: takes the features + the proposals from the RPN and computes 24 | detections / masks from it. 25 | """ 26 | 27 | def __init__(self, cfg): 28 | super(GeneralizedRCNN, self).__init__() 29 | 30 | self.backbone = build_backbone(cfg) 31 | if not cfg.RETINANET.RETINANET_ON: 32 | self.rpn = build_rpn(cfg) 33 | else: 34 | self.rpn = build_retinanet(cfg) 35 | self.roi_heads = build_roi_heads(cfg) 36 | 37 | def forward(self, images, targets=None): 38 | """ 39 | Arguments: 40 | images (list[Tensor] or ImageList): images to be processed 41 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 42 | 43 | Returns: 44 | result (list[BoxList] or dict[Tensor]): the output from the model. 45 | During training, it returns a dict[Tensor] which contains the losses. 46 | During testing, it returns list[BoxList] contains additional fields 47 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 48 | 49 | """ 50 | if self.training and targets is None: 51 | raise ValueError("In training mode, targets should be passed") 52 | images = to_image_list(images) 53 | features = self.backbone(images.tensors) 54 | proposals, proposal_losses = self.rpn(images, features, targets) 55 | if self.roi_heads: 56 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 57 | else: 58 | # RPN-only models don't have roi_heads 59 | x = features 60 | result = proposals 61 | detector_losses = {} 62 | 63 | if self.training: 64 | losses = {} 65 | losses.update(detector_losses) 66 | losses.update(proposal_losses) 67 | return losses 68 | 69 | return result 70 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg) 19 | self.predictor = make_roi_box_predictor(cfg) 20 | self.post_processor = make_roi_box_post_processor(cfg) 21 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 22 | 23 | def forward(self, features, proposals, targets=None): 24 | """ 25 | Arguments: 26 | features (list[Tensor]): feature-maps from possibly several levels 27 | proposals (list[BoxList]): proposal boxes 28 | targets (list[BoxList], optional): the ground-truth targets. 29 | 30 | Returns: 31 | x (Tensor): the result of the feature extractor 32 | proposals (list[BoxList]): during training, the subsampled proposals 33 | are returned. During testing, the predicted boxlists are returned 34 | losses (dict[Tensor]): During training, returns the losses for the 35 | head. During testing, returns an empty dict. 36 | """ 37 | 38 | if self.training: 39 | # Faster R-CNN subsamples during training the proposals with a fixed 40 | # positive / negative ratio 41 | with torch.no_grad(): 42 | proposals = self.loss_evaluator.subsample(proposals, targets) 43 | 44 | # extract features that will be fed to the final classifier. The 45 | # feature_extractor generally corresponds to the pooler + heads 46 | x = self.feature_extractor(features, proposals) 47 | # final classifier that converts the features into predictions 48 | class_logits, box_regression = self.predictor(x) 49 | 50 | if not self.training: 51 | result = self.post_processor((class_logits, box_regression), proposals) 52 | return x, result, {} 53 | 54 | loss_classifier, loss_box_reg = self.loss_evaluator( 55 | [class_logits], [box_regression] 56 | ) 57 | return ( 58 | x, 59 | proposals, 60 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 61 | ) 62 | 63 | 64 | def build_roi_box_head(cfg): 65 | """ 66 | Constructs a new box head. 67 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 68 | and make it a parameter in the config 69 | """ 70 | return ROIBoxHead(cfg) 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.modeling.backbone import resnet 6 | from maskrcnn_benchmark.modeling.poolers import Pooler 7 | 8 | 9 | class ResNet50Conv5ROIFeatureExtractor(nn.Module): 10 | def __init__(self, config): 11 | super(ResNet50Conv5ROIFeatureExtractor, self).__init__() 12 | 13 | resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 14 | scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES 15 | sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 16 | pooler = Pooler( 17 | output_size=(resolution, resolution), 18 | scales=scales, 19 | sampling_ratio=sampling_ratio, 20 | ) 21 | 22 | stage = resnet.StageSpec(index=4, block_count=3, return_features=False) 23 | head = resnet.ResNetHead( 24 | block_module=config.MODEL.RESNETS.TRANS_FUNC, 25 | stages=(stage,), 26 | num_groups=config.MODEL.RESNETS.NUM_GROUPS, 27 | width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, 28 | stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, 29 | stride_init=None, 30 | res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, 31 | ) 32 | 33 | self.pooler = pooler 34 | self.head = head 35 | 36 | def forward(self, x, proposals): 37 | x = self.pooler(x, proposals) 38 | x = self.head(x) 39 | return x 40 | 41 | 42 | class FPN2MLPFeatureExtractor(nn.Module): 43 | """ 44 | Heads for FPN for classification 45 | """ 46 | 47 | def __init__(self, cfg): 48 | super(FPN2MLPFeatureExtractor, self).__init__() 49 | 50 | resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION 51 | scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES 52 | sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 53 | pooler = Pooler( 54 | output_size=(resolution, resolution), 55 | scales=scales, 56 | sampling_ratio=sampling_ratio, 57 | ) 58 | input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution ** 2 59 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 60 | self.pooler = pooler 61 | self.fc6 = nn.Linear(input_size, representation_size) 62 | self.fc7 = nn.Linear(representation_size, representation_size) 63 | 64 | for l in [self.fc6, self.fc7]: 65 | # Caffe2 implementation uses XavierFill, which in fact 66 | # corresponds to kaiming_uniform_ in PyTorch 67 | nn.init.kaiming_uniform_(l.weight, a=1) 68 | nn.init.constant_(l.bias, 0) 69 | 70 | def forward(self, x, proposals): 71 | x = self.pooler(x, proposals) 72 | x = x.view(x.size(0), -1) 73 | 74 | x = F.relu(self.fc6(x)) 75 | x = F.relu(self.fc7(x)) 76 | 77 | return x 78 | 79 | 80 | _ROI_BOX_FEATURE_EXTRACTORS = { 81 | "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, 82 | "FPN2MLPFeatureExtractor": FPN2MLPFeatureExtractor, 83 | } 84 | 85 | 86 | def make_roi_box_feature_extractor(cfg): 87 | func = _ROI_BOX_FEATURE_EXTRACTORS[cfg.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR] 88 | return func(cfg) 89 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | 4 | 5 | class FastRCNNPredictor(nn.Module): 6 | def __init__(self, config, pretrained=None): 7 | super(FastRCNNPredictor, self).__init__() 8 | 9 | stage_index = 4 10 | stage2_relative_factor = 2 ** (stage_index - 1) 11 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 12 | num_inputs = res2_out_channels * stage2_relative_factor 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | class FPNPredictor(nn.Module): 34 | def __init__(self, cfg): 35 | super(FPNPredictor, self).__init__() 36 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 37 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 38 | 39 | self.cls_score = nn.Linear(representation_size, num_classes) 40 | self.bbox_pred = nn.Linear(representation_size, num_classes * 4) 41 | 42 | nn.init.normal_(self.cls_score.weight, std=0.01) 43 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 44 | for l in [self.cls_score, self.bbox_pred]: 45 | nn.init.constant_(l.bias, 0) 46 | 47 | def forward(self, x): 48 | scores = self.cls_score(x) 49 | bbox_deltas = self.bbox_pred(x) 50 | 51 | return scores, bbox_deltas 52 | 53 | 54 | _ROI_BOX_PREDICTOR = { 55 | "FastRCNNPredictor": FastRCNNPredictor, 56 | "FPNPredictor": FPNPredictor, 57 | } 58 | 59 | 60 | def make_roi_box_predictor(cfg): 61 | func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg) 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], BoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg) 41 | self.predictor = make_roi_mask_predictor(cfg) 42 | self.post_processor = make_roi_mask_post_processor(cfg) 43 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None): 46 | """ 47 | Arguments: 48 | features (list[Tensor]): feature-maps from possibly several levels 49 | proposals (list[BoxList]): proposal boxes 50 | targets (list[BoxList], optional): the ground-truth targets. 51 | 52 | Returns: 53 | x (Tensor): the result of the feature extractor 54 | proposals (list[BoxList]): during training, the original proposals 55 | are returned. During testing, the predicted boxlists are returned 56 | with the `mask` field set 57 | losses (dict[Tensor]): During training, returns the losses for the 58 | head. During testing, returns an empty dict. 59 | """ 60 | 61 | if self.training: 62 | # during training, only focus on positive boxes 63 | all_proposals = proposals 64 | proposals, positive_inds = keep_only_positive_boxes(proposals) 65 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 66 | x = features 67 | x = x[torch.cat(positive_inds, dim=0)] 68 | else: 69 | x = self.feature_extractor(features, proposals) 70 | mask_logits = self.predictor(x) 71 | 72 | if not self.training: 73 | result = self.post_processor(mask_logits, proposals) 74 | return x, result, {} 75 | 76 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 77 | 78 | return x, all_proposals, dict(loss_mask=loss_mask) 79 | 80 | 81 | def build_roi_mask_head(cfg): 82 | return ROIMaskHead(cfg) 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling.poolers import Pooler 7 | from maskrcnn_benchmark.layers import Conv2d 8 | 9 | 10 | class MaskRCNNFPNFeatureExtractor(nn.Module): 11 | """ 12 | Heads for FPN for classification 13 | """ 14 | 15 | def __init__(self, cfg): 16 | """ 17 | Arguments: 18 | num_classes (int): number of output classes 19 | input_size (int): number of channels of the input once it's flattened 20 | representation_size (int): size of the intermediate representation 21 | """ 22 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 23 | 24 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 25 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 26 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 27 | pooler = Pooler( 28 | output_size=(resolution, resolution), 29 | scales=scales, 30 | sampling_ratio=sampling_ratio, 31 | canonical_level=cfg.MODEL.ROI_MASK_HEAD.CANONICAL_LEVEL, 32 | ) 33 | input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS 34 | self.pooler = pooler 35 | 36 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 37 | 38 | next_feature = input_size 39 | self.blocks = [] 40 | for layer_idx, layer_features in enumerate(layers, 1): 41 | layer_name = "mask_fcn{}".format(layer_idx) 42 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 43 | # Caffe2 implementation uses MSRAFill, which in fact 44 | # corresponds to kaiming_normal_ in PyTorch 45 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 46 | nn.init.constant_(module.bias, 0) 47 | self.add_module(layer_name, module) 48 | next_feature = layer_features 49 | self.blocks.append(layer_name) 50 | 51 | def forward(self, x, proposals): 52 | x = self.pooler(x, proposals) 53 | 54 | for layer_name in self.blocks: 55 | x = F.relu(getattr(self, layer_name)(x)) 56 | 57 | return x 58 | 59 | 60 | _ROI_MASK_FEATURE_EXTRACTORS = { 61 | "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, 62 | "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor, 63 | } 64 | 65 | 66 | def make_roi_mask_feature_extractor(cfg): 67 | func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR] 68 | return func(cfg) 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | 8 | 9 | class MaskRCNNC4Predictor(nn.Module): 10 | def __init__(self, cfg): 11 | super(MaskRCNNC4Predictor, self).__init__() 12 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 13 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 14 | 15 | if cfg.MODEL.ROI_HEADS.USE_FPN: 16 | num_inputs = dim_reduced 17 | else: 18 | stage_index = 4 19 | stage2_relative_factor = 2 ** (stage_index - 1) 20 | res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 21 | num_inputs = res2_out_channels * stage2_relative_factor 22 | 23 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 24 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 25 | 26 | for name, param in self.named_parameters(): 27 | if "bias" in name: 28 | nn.init.constant_(param, 0) 29 | elif "weight" in name: 30 | # Caffe2 implementation uses MSRAFill, which in fact 31 | # corresponds to kaiming_normal_ in PyTorch 32 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 33 | 34 | def forward(self, x): 35 | x = F.relu(self.conv5_mask(x)) 36 | return self.mask_fcn_logits(x) 37 | 38 | 39 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor} 40 | 41 | 42 | def make_roi_mask_predictor(cfg): 43 | func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 44 | return func(cfg) 45 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .mask_head.mask_head import build_roi_mask_head 6 | 7 | 8 | class CombinedROIHeads(torch.nn.ModuleDict): 9 | """ 10 | Combines a set of individual heads (for box prediction or masks) into a single 11 | head. 12 | """ 13 | 14 | def __init__(self, cfg, heads): 15 | super(CombinedROIHeads, self).__init__(heads) 16 | self.cfg = cfg.clone() 17 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 18 | self.mask.feature_extractor = self.box.feature_extractor 19 | 20 | def forward(self, features, proposals, targets=None): 21 | losses = {} 22 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 23 | x, detections, loss_box = self.box(features, proposals, targets) 24 | losses.update(loss_box) 25 | if self.cfg.MODEL.MASK_ON: 26 | mask_features = features 27 | # optimization: during training, if we share the feature extractor between 28 | # the box and the mask heads, then we can reuse the features already computed 29 | if ( 30 | self.training 31 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 32 | ): 33 | mask_features = x 34 | # During training, self.box() will return the unaltered proposals as "detections" 35 | # this makes the API consistent during training and testing 36 | x, detections, loss_mask = self.mask(mask_features, detections, targets) 37 | losses.update(loss_mask) 38 | return x, detections, losses 39 | 40 | 41 | def build_roi_heads(cfg): 42 | # individually create the heads, that will be combined together 43 | # afterwards 44 | roi_heads = [] 45 | if not cfg.MODEL.RPN_ONLY: 46 | roi_heads.append(("box", build_roi_box_head(cfg))) 47 | if cfg.MODEL.MASK_ON: 48 | roi_heads.append(("mask", build_roi_mask_head(cfg))) 49 | 50 | # combine individual heads in a single module 51 | if roi_heads: 52 | roi_heads = CombinedROIHeads(cfg, roi_heads) 53 | 54 | return roi_heads 55 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = self.last_epoch / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/boxlist_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .bounding_box import BoxList 5 | 6 | from maskrcnn_benchmark.layers import nms as _box_nms 7 | 8 | 9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="score"): 10 | """ 11 | Performs non-maximum suppression on a boxlist, with scores specified 12 | in a boxlist field via score_field. 13 | 14 | Arguments: 15 | boxlist(BoxList) 16 | nms_thresh (float) 17 | max_proposals (int): if > 0, then only the top max_proposals are kept 18 | after non-maxium suppression 19 | score_field (str) 20 | """ 21 | if nms_thresh <= 0: 22 | return boxlist 23 | mode = boxlist.mode 24 | boxlist = boxlist.convert("xyxy") 25 | boxes = boxlist.bbox 26 | score = boxlist.get_field(score_field) 27 | keep = _box_nms(boxes, score, nms_thresh) 28 | if max_proposals > 0: 29 | keep = keep[: max_proposals] 30 | boxlist = boxlist[keep] 31 | return boxlist.convert(mode) 32 | 33 | 34 | def remove_small_boxes(boxlist, min_size): 35 | """ 36 | Only keep boxes with both sides >= min_size 37 | 38 | Arguments: 39 | boxlist (Boxlist) 40 | min_size (int) 41 | """ 42 | # TODO maybe add an API for querying the ws / hs 43 | xywh_boxes = boxlist.convert("xywh").bbox 44 | _, _, ws, hs = xywh_boxes.unbind(dim=1) 45 | keep = ( 46 | (ws >= min_size) & (hs >= min_size) 47 | ).nonzero().squeeze(1) 48 | return boxlist[keep] 49 | 50 | 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py 52 | # with slight modifications 53 | def boxlist_iou(boxlist1, boxlist2): 54 | """Compute the intersection over union of two set of boxes. 55 | The box order must be (xmin, ymin, xmax, ymax). 56 | 57 | Arguments: 58 | box1: (BoxList) bounding boxes, sized [N,4]. 59 | box2: (BoxList) bounding boxes, sized [M,4]. 60 | 61 | Returns: 62 | (tensor) iou, sized [N,M]. 63 | 64 | Reference: 65 | https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py 66 | """ 67 | if boxlist1.size != boxlist2.size: 68 | raise RuntimeError( 69 | "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) 70 | 71 | N = len(boxlist1) 72 | M = len(boxlist2) 73 | 74 | area1 = boxlist1.area() 75 | area2 = boxlist2.area() 76 | 77 | box1, box2 = boxlist1.bbox, boxlist2.bbox 78 | 79 | lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] 80 | rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] 81 | 82 | TO_REMOVE = 1 83 | 84 | wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] 85 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 86 | 87 | iou = inter / (area1[:, None] + area2 - inter) 88 | return iou 89 | 90 | 91 | # TODO redundant, remove 92 | def _cat(tensors, dim=0): 93 | """ 94 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 95 | """ 96 | assert isinstance(tensors, (list, tuple)) 97 | if len(tensors) == 1: 98 | return tensors[0] 99 | return torch.cat(tensors, dim) 100 | 101 | 102 | def cat_boxlist(bboxes): 103 | """ 104 | Concatenates a list of BoxList (having the same image size) into a 105 | single BoxList 106 | 107 | Arguments: 108 | bboxes (list[BoxList]) 109 | """ 110 | assert isinstance(bboxes, (list, tuple)) 111 | assert all(isinstance(bbox, BoxList) for bbox in bboxes) 112 | 113 | size = bboxes[0].size 114 | assert all(bbox.size == size for bbox in bboxes) 115 | 116 | mode = bboxes[0].mode 117 | assert all(bbox.mode == mode for bbox in bboxes) 118 | 119 | fields = set(bboxes[0].fields()) 120 | assert all(set(bbox.fields()) == fields for bbox in bboxes) 121 | 122 | cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode) 123 | 124 | for field in fields: 125 | data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0) 126 | cat_boxes.add_field(field, data) 127 | 128 | return cat_boxes 129 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | assert tensors.dim() == 4 45 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 46 | return ImageList(tensors, image_sizes) 47 | elif isinstance(tensors, (tuple, list)): 48 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 49 | 50 | # TODO Ideally, just remove this and let me model handle arbitrary 51 | # input sizs 52 | if size_divisible > 0: 53 | import math 54 | 55 | stride = size_divisible 56 | max_size = list(max_size) 57 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 58 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 59 | max_size = tuple(max_size) 60 | 61 | batch_shape = (len(tensors),) + max_size 62 | batched_imgs = tensors[0].new(*batch_shape).zero_() 63 | for img, pad_img in zip(tensors, batched_imgs): 64 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 65 | 66 | image_sizes = [im.shape[-2:] for im in tensors] 67 | 68 | return ImageList(batched_imgs, image_sizes) 69 | else: 70 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengyangfu/retinamask/5bcf2e744c6d9b4574682975dde0d4c24ff4cc59/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, "log.txt")) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | return object.__getattr__(self, attr) 56 | 57 | def __str__(self): 58 | loss_str = [] 59 | for name, meter in self.meters.items(): 60 | loss_str.append( 61 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 62 | ) 63 | return self.delimiter.join(loss_str) 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | import logging 4 | 5 | import torch 6 | 7 | from maskrcnn_benchmark.utils.imports import import_file 8 | 9 | 10 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict): 11 | """ 12 | Strategy: suppose that the models that we will create will have prefixes appended 13 | to each of its keys, for example due to an extra level of nesting that the original 14 | pre-trained weights from ImageNet won't contain. For example, model.state_dict() 15 | might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains 16 | res2.conv1.weight. We thus want to match both parameters together. 17 | For that, we look for each model weight, look among all loaded keys if there is one 18 | that is a suffix of the current weight name, and use it if that's the case. 19 | If multiple matches exist, take the one with longest size 20 | of the corresponding name. For example, for the same model as before, the pretrained 21 | weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, 22 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 23 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 24 | """ 25 | current_keys = sorted(list(model_state_dict.keys())) 26 | loaded_keys = sorted(list(loaded_state_dict.keys())) 27 | # get a matrix of string matches, where each (i, j) entry correspond to the size of the 28 | # loaded_key string, if it matches 29 | match_matrix = [ 30 | len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys 31 | ] 32 | match_matrix = torch.as_tensor(match_matrix).view( 33 | len(current_keys), len(loaded_keys) 34 | ) 35 | max_match_size, idxs = match_matrix.max(1) 36 | # remove indices that correspond to no-match 37 | idxs[max_match_size == 0] = -1 38 | 39 | # used for logging 40 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 41 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 42 | log_str_template = "{: <{}} loaded from {: <{}} of shape {}" 43 | logger = logging.getLogger(__name__) 44 | for idx_new, idx_old in enumerate(idxs.tolist()): 45 | if idx_old == -1: 46 | continue 47 | key = current_keys[idx_new] 48 | key_old = loaded_keys[idx_old] 49 | model_state_dict[key] = loaded_state_dict[key_old] 50 | logger.info( 51 | log_str_template.format( 52 | key, 53 | max_size, 54 | key_old, 55 | max_size_loaded, 56 | tuple(loaded_state_dict[key_old].shape), 57 | ) 58 | ) 59 | 60 | 61 | def strip_prefix_if_present(state_dict, prefix): 62 | keys = sorted(state_dict.keys()) 63 | if not all(key.startswith(prefix) for key in keys): 64 | return state_dict 65 | stripped_state_dict = OrderedDict() 66 | for key, value in state_dict.items(): 67 | stripped_state_dict[key.replace(prefix, "")] = value 68 | return stripped_state_dict 69 | 70 | 71 | def load_state_dict(model, loaded_state_dict): 72 | model_state_dict = model.state_dict() 73 | # if the state_dict comes from a model that was wrapped in a 74 | # DataParallel or DistributedDataParallel during serialization, 75 | # remove the "module" prefix before performing the matching 76 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 77 | align_and_update_state_dicts(model_state_dict, loaded_state_dict) 78 | 79 | # use strict loading 80 | model.load_state_dict(model_state_dict) 81 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | from torch.utils.model_zoo import _download_url_to_file 6 | from torch.utils.model_zoo import urlparse 7 | from torch.utils.model_zoo import HASH_REGEX 8 | 9 | from maskrcnn_benchmark.utils.comm import is_main_process 10 | from maskrcnn_benchmark.utils.comm import synchronize 11 | 12 | 13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 14 | # but with a few improvements and modifications 15 | def cache_url(url, model_dir=None, progress=True): 16 | r"""Loads the Torch serialized object at the given URL. 17 | If the object is already present in `model_dir`, it's deserialized and 18 | returned. The filename part of the URL should follow the naming convention 19 | ``filename-.ext`` where ```` is the first eight or more 20 | digits of the SHA256 hash of the contents of the file. The hash is used to 21 | ensure unique names and to verify the contents of the file. 22 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 23 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 24 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 25 | Args: 26 | url (string): URL of the object to download 27 | model_dir (string, optional): directory in which to save the object 28 | progress (bool, optional): whether or not to display a progress bar to stderr 29 | Example: 30 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 31 | """ 32 | if model_dir is None: 33 | torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) 34 | model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) 35 | if not os.path.exists(model_dir): 36 | os.makedirs(model_dir) 37 | parts = urlparse(url) 38 | filename = os.path.basename(parts.path) 39 | if filename == "model_final.pkl": 40 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 41 | # so make the full path the filename by replacing / with _ 42 | filename = parts.path.replace("/", "_") 43 | cached_file = os.path.join(model_dir, filename) 44 | if not os.path.exists(cached_file) and is_main_process(): 45 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 46 | hash_prefix = HASH_REGEX.search(filename) 47 | if hash_prefix is not None: 48 | hash_prefix = hash_prefix.group(1) 49 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 50 | # which matches the hash PyTorch uses. So we skip the hash matching 51 | # if the hash_prefix is less than 6 characters 52 | if len(hash_prefix) < 6: 53 | hash_prefix = None 54 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 55 | synchronize() 56 | return cached_file 57 | -------------------------------------------------------------------------------- /run_test_R-50.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MODEL=$1 3 | 4 | for ITER in 0089999 0080001 0070001 0060001 0050001 0040001 0030001 0020001 0010001 5 | do 6 | python tools/test_net.py --config-file ./configs/retina/${MODEL}.yaml MODEL.WEIGHT ./models/${MODEL}/model_${ITER}.pth OUTPUT_DIR ./models/${MODEL}/${ITER} TEST.IMS_PER_BATCH 4 7 | done 8 | 9 | #for ITER in 89999 10 | #do 11 | # python tools/test_net.py --config-file ./configs/retina/retinanet_R-50-FPN_1x.yaml MODEL.WEIGHT ./models/retinanet_R-50-FPN_1x_1101/model_00${ITER}.pth OUTPUT_DIR ./models/retinanet_R-50-FPN_1x_1101/${ITER} TEST.IMS_PER_BATCH 1 12 | #done 13 | -------------------------------------------------------------------------------- /run_test_R-50_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MODEL=$1 3 | 4 | for ITER in 0089999 0080001 0070001 0060001 0050001 0040001 0030001 0020001 0010001 5 | do 6 | python -m torch.distributed.launch --nproc_per_node=$NGPUS tools/test_net.py --config-file ./configs/retina/${MODEL}.yaml MODEL.WEIGHT ./models/${MODEL}/model_${ITER}.pth OUTPUT_DIR ./models/${MODEL}/${ITER} TEST.IMS_PER_BATCH 4 7 | done 8 | 9 | #for ITER in 89999 10 | #do 11 | # python tools/test_net.py --config-file ./configs/retina/retinanet_R-50-FPN_1x.yaml MODEL.WEIGHT ./models/retinanet_R-50-FPN_1x_1101/model_00${ITER}.pth OUTPUT_DIR ./models/retinanet_R-50-FPN_1x_1101/${ITER} TEST.IMS_PER_BATCH 1 12 | #done 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /tools/parse_log.py: -------------------------------------------------------------------------------- 1 | import re 2 | import argparse 3 | import numpy as np 4 | 5 | def parse(log_path): 6 | with open(log_path) as f: 7 | text = f.read() 8 | 9 | float_pattern = r'\d+\.\d+' 10 | mean_pattern = r'AdjustSmoothL1\(mean\): ({}), ({}), ({}), ({})'.format( 11 | float_pattern, float_pattern, float_pattern, float_pattern) 12 | var_pattern = r'AdjustSmoothL1\(var\): ({}), ({}), ({}), ({})'.format( 13 | float_pattern, float_pattern, float_pattern, float_pattern) 14 | pattern = mean_pattern + r'.*\n.*' + var_pattern + r'.*\n.*' + \ 15 | r'iter: (\d+) ' + \ 16 | r'loss: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 17 | r'loss_retina_cls: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 18 | r'loss_retina_reg: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 19 | r'loss_mask: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 20 | r'time: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 21 | r'data: ({}) \(({})\) '.format(float_pattern, float_pattern) + \ 22 | r'lr: ({}) '.format(float_pattern) + \ 23 | r'max mem: (\d+)' 24 | reg_exp = re.compile(pattern) 25 | 26 | headers = ['smooth_l1_mean', 'smooth_l1_var', 'iter', 'loss', 27 | 'loss_retina_cls', 'loss_retina_reg', 'loss_mask', 28 | 'time', 'data', 'lr', 'max_mem'] 29 | 30 | iterations = list() 31 | means = list() 32 | variations = list() 33 | running_losses = list() 34 | for args in reg_exp.findall(text): 35 | mean = [float(v) for v in args[0:4]] 36 | var = [float(v) for v in args[5:8]] 37 | iteration = int(args[8]) 38 | point_loss = float(args[9]) 39 | running_loss = float(args[10]) 40 | point_loss_retina_cls = float(args[11]) 41 | running_loss_retina_cls = float(args[12]) 42 | point_loss_retina_reg = float(args[13]) 43 | running_loss_retina_reg = float(args[14]) 44 | point_loss_mask = float(args[15]) 45 | running_loss_mask = float(args[16]) 46 | point_time = float(args[17]) 47 | running_time = float(args[18]) 48 | point_data = float(args[19]) 49 | running_data = float(args[20]) 50 | lr = float(args[21]) 51 | max_mem = int(args[22]) 52 | 53 | iterations.append(iteration) 54 | means.append(mean) 55 | variations.append(var) 56 | running_losses.append(running_loss) 57 | 58 | iterations = np.asarray(iterations) 59 | means = np.asarray(means) 60 | variations = np.asarray(variations) 61 | running_losses = np.asarray(running_losses) 62 | print(iterations) 63 | print(means) 64 | 65 | 66 | if __name__ == "__main__": 67 | parser = argparse.ArgumentParser(description='Parse log file') 68 | parser.add_argument('log_path', metavar='P', help='path to the log file') 69 | args = parser.parse_args() 70 | 71 | parse(args.log_path) 72 | 73 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Set up custom environment before nearly anything else is imported 3 | # NOTE: this should be the first import (no not reorder) 4 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 5 | 6 | import argparse 7 | import os 8 | 9 | import torch 10 | from maskrcnn_benchmark.config import cfg 11 | from maskrcnn_benchmark.data import make_data_loader 12 | from maskrcnn_benchmark.engine.inference import inference 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank 17 | from maskrcnn_benchmark.utils.logger import setup_logger 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 23 | parser.add_argument( 24 | "--config-file", 25 | default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", 26 | metavar="FILE", 27 | help="path to config file", 28 | ) 29 | parser.add_argument("--local_rank", type=int, default=0) 30 | parser.add_argument( 31 | "opts", 32 | help="Modify config options using the command-line", 33 | default=None, 34 | nargs=argparse.REMAINDER, 35 | ) 36 | 37 | args = parser.parse_args() 38 | 39 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 40 | distributed = num_gpus > 1 41 | 42 | if distributed: 43 | torch.cuda.set_device(args.local_rank) 44 | torch.distributed.deprecated.init_process_group( 45 | backend="nccl", init_method="env://" 46 | ) 47 | 48 | cfg.merge_from_file(args.config_file) 49 | cfg.merge_from_list(args.opts) 50 | cfg.freeze() 51 | 52 | save_dir = "" 53 | logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) 54 | logger.info("Using {} GPUs".format(num_gpus)) 55 | logger.info(cfg) 56 | 57 | logger.info("Collecting env info (might take some time)") 58 | logger.info("\n" + collect_env_info()) 59 | 60 | model = build_detection_model(cfg) 61 | model.to(cfg.MODEL.DEVICE) 62 | 63 | checkpointer = DetectronCheckpointer(cfg, model) 64 | _ = checkpointer.load(cfg.MODEL.WEIGHT) 65 | 66 | iou_types = ("bbox",) 67 | if cfg.MODEL.MASK_ON: 68 | iou_types = iou_types + ("segm",) 69 | output_folders = [None] * len(cfg.DATASETS.TEST) 70 | if cfg.OUTPUT_DIR: 71 | dataset_names = cfg.DATASETS.TEST 72 | for idx, dataset_name in enumerate(dataset_names): 73 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 74 | mkdir(output_folder) 75 | output_folders[idx] = output_folder 76 | data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) 77 | for output_folder, data_loader_val in zip(output_folders, data_loaders_val): 78 | inference( 79 | model, 80 | data_loader_val, 81 | iou_types=iou_types, 82 | #box_only=cfg.MODEL.RPN_ONLY, 83 | box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY, 84 | device=cfg.MODEL.DEVICE, 85 | expected_results=cfg.TEST.EXPECTED_RESULTS, 86 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 87 | output_folder=output_folder, 88 | ) 89 | synchronize() 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | --------------------------------------------------------------------------------