├── ABSTRACTIONS.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TROUBLESHOOTING.md ├── configs ├── caffe2 │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── cityscapes │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── dcn │ ├── README.md │ ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml │ ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml │ └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml ├── dw │ ├── e2e_mask_rcnn_R_101_FPN_1x.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_R_50_FPN_2x.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_faster_rcnn_fbnet.yaml ├── e2e_faster_rcnn_fbnet_600.yaml ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_fbnet.yaml ├── e2e_mask_rcnn_fbnet_600.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml ├── gn_baselines │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml ├── pascal_voc │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── quick_schedules │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── rpn_R_50_C4_quick.yaml │ └── rpn_R_50_FPN_quick.yaml ├── retinanet │ ├── retinanet_R-101-FPN_1x.yaml │ ├── retinanet_R-101-FPN_P5_1x.yaml │ ├── retinanet_R-50-FPN_1x.yaml │ ├── retinanet_R-50-FPN_1x_quick.yaml │ ├── retinanet_R-50-FPN_P5_1x.yaml │ └── retinanet_X_101_32x8d_FPN_1x.yaml ├── rpn_R_101_FPN_1x.yaml ├── rpn_R_50_C4_1x.yaml ├── rpn_R_50_FPN_1x.yaml └── rpn_X_101_32x8d_FPN_1x.yaml ├── demo ├── Mask_R-CNN_demo.ipynb ├── README.md ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png ├── predictor.py ├── test_visualise.py ├── visualise.py └── webcam.py ├── docker ├── Dockerfile └── docker-jupyter │ ├── Dockerfile │ └── jupyter_notebook_config.py ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ ├── deform_conv_module.py │ │ ├── deform_pool_func.py │ │ ├── deform_pool_module.py │ │ ├── deform_unfold_func.py │ │ └── deform_unfold_module.py │ ├── dualgraph.py │ ├── dynamic_weight.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fbnet.py │ │ ├── fbnet_builder.py │ │ ├── fbnet_modeldef.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ └── retinanet.py │ │ ├── rpn.py │ │ └── utils.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── compute_flop.py │ ├── cv2_util.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── registry.py │ └── timer.py ├── requirements.txt ├── run.sh ├── setup.py ├── test.sh ├── tests ├── checkpoint.py ├── env_tests │ └── env.py ├── test_backbones.py ├── test_box_coder.py ├── test_configs.py ├── test_data_samplers.py ├── test_detectors.py ├── test_fbnet.py ├── test_feature_extractors.py ├── test_metric_logger.py ├── test_nms.py ├── test_predictors.py ├── test_rpn_heads.py ├── test_segmentation_mask.py └── utils.py ├── tools ├── cityscapes │ ├── convert_cityscapes_to_coco.py │ └── instances2dict_with_polygons.py ├── test_net.py └── train_net.py └── visualise └── offset.m /ABSTRACTIONS.md: -------------------------------------------------------------------------------- 1 | ## Abstractions 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to 3 | have in mind are the following: 4 | 5 | ### ImageList 6 | In PyTorch, the first dimension of the input to the network generally represents 7 | the batch dimension, and thus all elements of the same batch have the same 8 | height / width. 9 | In order to support images with different sizes and aspect ratios in the same 10 | batch, we created the `ImageList` class, which holds internally a batch of 11 | images (os possibly different sizes). The images are padded with zeros such that 12 | they have the same final size and batched over the first dimension. The original 13 | sizes of the images before padding are stored in the `image_sizes` attribute, 14 | and the batched tensor in `tensors`. 15 | We provide a convenience function `to_image_list` that accepts a few different 16 | input types, including a list of tensors, and returns an `ImageList` object. 17 | 18 | ```python 19 | from maskrcnn_benchmark.structures.image_list import to_image_list 20 | 21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)] 22 | batched_images = to_image_list(images) 23 | 24 | # it is also possible to make the final batched image be a multiple of a number 25 | batched_images_32 = to_image_list(images, size_divisible=32) 26 | ``` 27 | 28 | ### BoxList 29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for 30 | a specific image, as well as the size of the image as a `(width, height)` tuple. 31 | It also contains a set of methods that allow to perform geometric 32 | transformations to the bounding boxes (such as cropping, scaling and flipping). 33 | The class accepts bounding boxes from two different input formats: 34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and 35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`. 36 | 37 | Additionally, each `BoxList` instance can also hold arbitrary additional information 38 | for each bounding box, such as labels, visibility, probability scores etc. 39 | 40 | Here is an example on how to create a `BoxList` from a list of coordinates: 41 | ```python 42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT 43 | 44 | width = 100 45 | height = 200 46 | boxes = [ 47 | [0, 10, 50, 50], 48 | [50, 20, 90, 60], 49 | [10, 10, 50, 50] 50 | ] 51 | # create a BoxList with 3 boxes 52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy') 53 | 54 | # perform some box transformations, has similar API as PIL.Image 55 | bbox_scaled = bbox.resize((width * 2, height * 3)) 56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT) 57 | 58 | # add labels for each bbox 59 | labels = torch.tensor([0, 10, 1]) 60 | bbox.add_field('labels', labels) 61 | 62 | # bbox also support a few operations, like indexing 63 | # here, selects boxes 0 and 2 64 | bbox_subset = bbox[[0, 2]] 65 | ``` 66 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Mask-RCNN Benchmark 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis. 7 | 8 | ## Pull Requests 9 | We actively welcome your pull requests. 10 | 11 | 1. Fork the repo and create your branch from `master`. 12 | 2. If you've added code that should be tested, add tests. 13 | 3. If you've changed APIs, update the documentation. 14 | 4. Ensure the test suite passes. 15 | 5. Make sure your code lints. 16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 17 | 18 | ## Contributor License Agreement ("CLA") 19 | In order to accept your pull request, we need you to submit a CLA. You only need 20 | to do this once to work on any of Facebook's open source projects. 21 | 22 | Complete your CLA here: 23 | 24 | ## Issues 25 | We use GitHub issues to track public bugs. Please ensure your description is 26 | clear and has sufficient instructions to be able to reproduce the issue. 27 | 28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 29 | disclosure of security bugs. In those cases, please go through the process 30 | outlined on that page and do not file a public issue. 31 | 32 | ## Coding Style 33 | * 4 spaces for indentation rather than tabs 34 | * 80 character line length 35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/) 36 | 37 | ## License 38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.0 from a nightly release. It **will not** work with 1.0 nor 1.0.1. Installation instructions can be found in https://pytorch.org/get-started/locally/ 5 | - torchvision from master 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - OpenCV 11 | 12 | 13 | ### Option 1: Step-by-step installation 14 | 15 | ```bash 16 | # first, make sure that your conda is setup properly with the right environment 17 | # for that, check that `which conda`, `which pip` and `which python` points to the 18 | # right path. From a clean conda env, this is what you need to do 19 | 20 | conda create --name maskrcnn_benchmark 21 | conda activate maskrcnn_benchmark 22 | 23 | # this installs the right pip and dependencies for the fresh python 24 | conda install ipython 25 | 26 | # maskrcnn_benchmark and coco api dependencies 27 | pip install ninja yacs cython matplotlib tqdm opencv-python 28 | 29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 30 | # we give the instructions for CUDA 9.0 31 | conda install -c pytorch pytorch-nightly torchvision cudatoolkit=9.0 32 | 33 | export INSTALL_DIR=$PWD 34 | 35 | # install pycocotools 36 | cd $INSTALL_DIR 37 | git clone https://github.com/cocodataset/cocoapi.git 38 | cd cocoapi/PythonAPI 39 | python setup.py build_ext install 40 | 41 | # install apex 42 | cd $INSTALL_DIR 43 | git clone https://github.com/NVIDIA/apex.git 44 | cd apex 45 | python setup.py install --cuda_ext --cpp_ext 46 | 47 | # install PyTorch Detection 48 | cd $INSTALL_DIR 49 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 50 | cd maskrcnn-benchmark 51 | 52 | # the following will install the lib with 53 | # symbolic links, so that you can modify 54 | # the files if you want and won't need to 55 | # re-build it 56 | python setup.py build develop 57 | 58 | 59 | unset INSTALL_DIR 60 | 61 | # or if you are on macOS 62 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 63 | ``` 64 | 65 | ### Option 2: Docker Image (Requires CUDA, Linux only) 66 | 67 | Build image with defaults (`CUDA=9.0`, `CUDNN=7`, `FORCE_CUDA=1`): 68 | 69 | nvidia-docker build -t maskrcnn-benchmark docker/ 70 | 71 | Build image with other CUDA and CUDNN versions: 72 | 73 | nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ 74 | 75 | Build image with FORCE_CUDA disabled: 76 | 77 | nvidia-docker build -t maskrcnn-benchmark --build-arg FORCE_CUDA=0 docker/ 78 | 79 | Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook): 80 | 81 | nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/ 82 | nvidia-docker run -td -p 8888:8888 -e PASSWORD= -v : maskrcnn-benchmark-jupyter 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Facebook 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x" 4 | BACKBONE: 5 | CONV_BODY: "R-152-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | DATASETS: 25 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 26 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 27 | DATALOADER: 28 | SIZE_DIVISIBILITY: 32 29 | SOLVER: 30 | BASE_LR: 0.01 31 | WEIGHT_DECAY: 0.0001 32 | STEPS: (18000,) 33 | MAX_ITER: 24000 34 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 35 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/dcn/README.md: -------------------------------------------------------------------------------- 1 | ### Reference 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf) 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) 4 | 5 | ### Performance 6 | | case | bbox AP | mask AP | 7 | |----------------------------:|--------:|:-------:| 8 | | R-50-FPN-dcn (implement) | 39.8 | - | 9 | | R-50-FPN-dcn (mmdetection) | 40.0 | - | 10 | | R-50-FPN-mdcn (implement) | 40.0 | - | 11 | | R-50-FPN-mdcn (mmdetection) | 40.3 | - | 12 | | R-50-FPN-dcn (implement) | 40.8 | 36.8 | 13 | | R-50-FPN-dcn (mmdetection) | 41.1 | 37.2 | 14 | | R-50-FPN-dcn (implement) | 40.7 | 36.7 | 15 | | R-50-FPN-dcn (mmdetection) | 41.4 | 37.4 | 16 | 17 | 18 | ### Note 19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details. 20 | 21 | 22 | ### Usage 23 | add these three lines 24 | ``` 25 | MODEL: 26 | RESNETS: 27 | # corresponding to C2,C3,C4,C5 28 | STAGE_WITH_DCN: (False, True, True, True) 29 | WITH_MODULATED_DCN: True 30 | DEFORMABLE_GROUPS: 1 31 | ``` -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | 55 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | -------------------------------------------------------------------------------- /configs/dw/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STAGE_WITH_DW: (False, True, True, True) 9 | DYNAMIC_WEIGHT: 10 | GROUP: 4 11 | KERNEL: 3 12 | DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12)) 13 | SHUFFLE: False 14 | DEFORM: "deformatt" 15 | INSERT_POS: "after3x3" 16 | DOMAIN: "block" 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | ROI_BOX_HEAD: 27 | POOLER_RESOLUTION: 7 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | POOLER_SAMPLING_RATIO: 2 30 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 31 | PREDICTOR: "FPNPredictor" 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | MASK_ON: True 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_101_FPN_111da_33_re_1x" 52 | -------------------------------------------------------------------------------- /configs/dw/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STAGE_WITH_DW: (False, False, False, True) 9 | DYNAMIC_WEIGHT: 10 | GROUP: 4 11 | KERNEL: 3 12 | DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12)) 13 | SHUFFLE: False 14 | DEFORM: "deformatt" 15 | INSERT_POS: "after3x3" 16 | DOMAIN: "block" 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | ROI_BOX_HEAD: 27 | POOLER_RESOLUTION: 7 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | POOLER_SAMPLING_RATIO: 2 30 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 31 | PREDICTOR: "FPNPredictor" 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | MASK_ON: True 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | #OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_001_deDGMN_1x" 52 | -------------------------------------------------------------------------------- /configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STAGE_WITH_DW: (False, True, True, True) 9 | DYNAMIC_WEIGHT: 10 | GROUP: 4 11 | KERNEL: 3 12 | DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12)) 13 | SHUFFLE: False 14 | DEFORM: "deformatt" 15 | INSERT_POS: "after3x3" 16 | DOMAIN: "block" 17 | RPN: 18 | USE_FPN: True 19 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 20 | PRE_NMS_TOP_N_TRAIN: 2000 21 | PRE_NMS_TOP_N_TEST: 1000 22 | POST_NMS_TOP_N_TEST: 1000 23 | FPN_POST_NMS_TOP_N_TEST: 1000 24 | ROI_HEADS: 25 | USE_FPN: True 26 | ROI_BOX_HEAD: 27 | POOLER_RESOLUTION: 7 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | POOLER_SAMPLING_RATIO: 2 30 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 31 | PREDICTOR: "FPNPredictor" 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | MASK_ON: True 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (120000, 160000) 50 | MAX_ITER: 180000 51 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_111da_33_re_2x" -------------------------------------------------------------------------------- /configs/dw/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | STAGE_WITH_DW: (False, True, True, True) 12 | DYNAMIC_WEIGHT: 13 | GROUP: 4 14 | KERNEL: 3 15 | DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12)) 16 | SHUFFLE: False 17 | DEFORM: "deformatt" 18 | INSERT_POS: "after3x3" 19 | DOMAIN: "block" 20 | RPN: 21 | USE_FPN: True 22 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 23 | PRE_NMS_TOP_N_TRAIN: 2000 24 | PRE_NMS_TOP_N_TEST: 1000 25 | POST_NMS_TOP_N_TEST: 1000 26 | FPN_POST_NMS_TOP_N_TEST: 1000 27 | ROI_HEADS: 28 | USE_FPN: True 29 | ROI_BOX_HEAD: 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNPredictor" 35 | ROI_MASK_HEAD: 36 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 37 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 38 | PREDICTOR: "MaskRCNNC4Predictor" 39 | POOLER_RESOLUTION: 14 40 | POOLER_SAMPLING_RATIO: 2 41 | RESOLUTION: 28 42 | SHARE_BOX_FEATURE_EXTRACTOR: False 43 | MASK_ON: True 44 | DATASETS: 45 | TRAIN: ("coco_2017_train",) 46 | TEST: ("coco_2017_test",) 47 | DATALOADER: 48 | SIZE_DIVISIBILITY: 32 49 | SOLVER: 50 | BASE_LR: 0.01 51 | WEIGHT_DECAY: 0.0001 52 | STEPS: (120000, 160000) 53 | MAX_ITER: 180000 54 | IMS_PER_BATCH: 8 55 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_111da_33_re_1x" 56 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RPN: 7 | USE_FPN: True 8 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 9 | PRE_NMS_TOP_N_TRAIN: 2000 10 | PRE_NMS_TOP_N_TEST: 1000 11 | POST_NMS_TOP_N_TEST: 1000 12 | FPN_POST_NMS_TOP_N_TEST: 1000 13 | ROI_HEADS: 14 | USE_FPN: True 15 | ROI_BOX_HEAD: 16 | POOLER_RESOLUTION: 7 17 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 18 | POOLER_SAMPLING_RATIO: 2 19 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 20 | PREDICTOR: "FPNPredictor" 21 | RESNETS: 22 | BACKBONE_OUT_CHANNELS: 256 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 100 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 512 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (320, ) 40 | MAX_SIZE_TRAIN: 640 41 | MIN_SIZE_TEST: 320 42 | MAX_SIZE_TEST: 640 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "cham_v1a" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 128 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.045 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (90000, 120000) 34 | MAX_ITER: 135000 35 | IMS_PER_BATCH: 96 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_test",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | #OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x" 43 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2017_train",) 37 | TEST: ("coco_2017_test",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: -1.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 512 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/gn_baselines/README.md: -------------------------------------------------------------------------------- 1 | ### Group Normalization 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494) 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md) 5 | 6 | 7 | ### Performance 8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP | 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:| 10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 | 11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 | 12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 | 13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 | 14 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (60000, 80000) 46 | MAX_ITER: 90000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (60000, 80000) 48 | MAX_ITER: 90000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 90000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 63 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (210000, 250000) 47 | MAX_ITER: 270000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | DATASETS: 40 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 41 | TEST: ("coco_2014_minival",) 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 8 gpus 46 | BASE_LR: 0.02 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (210000, 250000) 49 | MAX_ITER: 270000 50 | IMS_PER_BATCH: 16 51 | TEST: 52 | IMS_PER_BATCH: 8 53 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | ROI_MASK_HEAD: 38 | USE_GN: True # use GN for mask head 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | CONV_LAYERS: (256, 256, 256, 256) 41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 42 | PREDICTOR: "MaskRCNNC4Predictor" 43 | POOLER_RESOLUTION: 14 44 | POOLER_SAMPLING_RATIO: 2 45 | RESOLUTION: 28 46 | SHARE_BOX_FEATURE_EXTRACTOR: False 47 | MASK_ON: True 48 | DATASETS: 49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 50 | TEST: ("coco_2014_minival",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | # Assume 8 gpus 55 | BASE_LR: 0.02 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (210000, 250000) 58 | MAX_ITER: 270000 59 | IMS_PER_BATCH: 16 60 | TEST: 61 | IMS_PER_BATCH: 8 62 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | ROI_MASK_HEAD: 40 | USE_GN: True # use GN for mask head 41 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 42 | CONV_LAYERS: (256, 256, 256, 256) 43 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 44 | PREDICTOR: "MaskRCNNC4Predictor" 45 | POOLER_RESOLUTION: 14 46 | POOLER_SAMPLING_RATIO: 2 47 | RESOLUTION: 28 48 | SHARE_BOX_FEATURE_EXTRACTOR: False 49 | MASK_ON: True 50 | DATASETS: 51 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 52 | TEST: ("coco_2014_minival",) 53 | DATALOADER: 54 | SIZE_DIVISIBILITY: 32 55 | SOLVER: 56 | # Assume 8 gpus 57 | BASE_LR: 0.02 58 | WEIGHT_DECAY: 0.0001 59 | STEPS: (210000, 250000) 60 | MAX_ITER: 270000 61 | IMS_PER_BATCH: 16 62 | TEST: 63 | IMS_PER_BATCH: 8 64 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.001 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (50000, ) 17 | MAX_ITER: 70000 18 | IMS_PER_BATCH: 1 19 | TEST: 20 | IMS_PER_BATCH: 1 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.004 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (12500, ) 17 | MAX_ITER: 17500 18 | IMS_PER_BATCH: 4 19 | TEST: 20 | IMS_PER_BATCH: 4 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 21 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("voc_2012_train_cocostyle",) 35 | TEST: ("voc_2012_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: (600,) 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_minival",) 26 | TEST: ("coco_2014_minival",) 27 | INPUT: 28 | MIN_SIZE_TRAIN: (600,) 29 | MAX_SIZE_TRAIN: 1000 30 | MIN_SIZE_TEST: 800 31 | MAX_SIZE_TEST: 1000 32 | DATALOADER: 33 | SIZE_DIVISIBILITY: 32 34 | SOLVER: 35 | BASE_LR: 0.005 36 | WEIGHT_DECAY: 0.0001 37 | STEPS: (1500,) 38 | MAX_ITER: 2000 39 | IMS_PER_BATCH: 4 40 | TEST: 41 | IMS_PER_BATCH: 2 42 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: (600,) 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 2 25 | ROI_KEYPOINT_HEAD: 26 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 27 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 28 | PREDICTOR: "KeypointRCNNPredictor" 29 | POOLER_RESOLUTION: 14 30 | POOLER_SAMPLING_RATIO: 2 31 | RESOLUTION: 56 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | KEYPOINT_ON: True 34 | DATASETS: 35 | TRAIN: ("keypoints_coco_2014_minival",) 36 | TEST: ("keypoints_coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 39 | MAX_SIZE_TRAIN: 1000 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1000 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (1500,) 48 | MAX_ITER: 2000 49 | IMS_PER_BATCH: 4 50 | TEST: 51 | IMS_PER_BATCH: 2 52 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: (600,) 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (1500,) 47 | MAX_ITER: 2000 48 | IMS_PER_BATCH: 4 49 | TEST: 50 | IMS_PER_BATCH: 2 51 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: (600,) 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_minival",) 17 | TEST: ("coco_2014_minival",) 18 | INPUT: 19 | MIN_SIZE_TRAIN: (600,) 20 | MAX_SIZE_TRAIN: 1000 21 | MIN_SIZE_TEST: 800 22 | MAX_SIZE_TEST: 1000 23 | DATALOADER: 24 | SIZE_DIVISIBILITY: 32 25 | SOLVER: 26 | BASE_LR: 0.005 27 | WEIGHT_DECAY: 0.0001 28 | STEPS: (1500,) 29 | MAX_ITER: 2000 30 | IMS_PER_BATCH: 4 31 | TEST: 32 | IMS_PER_BATCH: 2 33 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800, ) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800, ) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800,) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (3500,) 47 | MAX_ITER: 4000 48 | IMS_PER_BATCH: 4 49 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800,) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | STRIDE_IN_1X1: False 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | RPN: 14 | USE_FPN: True 15 | FG_IOU_THRESHOLD: 0.5 16 | BG_IOU_THRESHOLD: 0.4 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | ROI_HEADS: 23 | USE_FPN: True 24 | BATCH_SIZE_PER_IMAGE: 256 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | RETINANET: 32 | SCALES_PER_OCTAVE: 3 33 | STRADDLE_THRESH: -1 34 | FG_IOU_THRESHOLD: 0.5 35 | BG_IOU_THRESHOLD: 0.4 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800, ) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (240000, 320000) 51 | MAX_ITER: 360000 52 | IMS_PER_BATCH: 4 53 | -------------------------------------------------------------------------------- /configs/rpn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 10 | TEST: ("coco_2014_minival",) 11 | SOLVER: 12 | BASE_LR: 0.02 13 | WEIGHT_DECAY: 0.0001 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | -------------------------------------------------------------------------------- /configs/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | STRIDE_IN_1X1: False 10 | NUM_GROUPS: 32 11 | WIDTH_PER_GROUP: 8 12 | RPN: 13 | USE_FPN: True 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 2000 17 | FPN_POST_NMS_TOP_N_TEST: 2000 18 | DATASETS: 19 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 20 | TEST: ("coco_2014_minival",) 21 | DATALOADER: 22 | SIZE_DIVISIBILITY: 32 23 | SOLVER: 24 | BASE_LR: 0.02 25 | WEIGHT_DECAY: 0.0001 26 | STEPS: (60000, 80000) 27 | MAX_ITER: 90000 28 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | ## Webcam and Jupyter notebook demo 2 | 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference. 4 | 5 | 6 | ### With your preferred environment 7 | 8 | You can start it by running it from this folder, using one of the following commands: 9 | ```bash 10 | # by default, it runs on the GPU 11 | # for best results, use min-image-size 800 12 | python webcam.py --min-image-size 800 13 | # can also run it on the CPU 14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu 15 | # or change the model that you want to use 16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu 17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps 18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu 19 | ``` 20 | 21 | ### With Docker 22 | 23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions) 24 | 25 | Adjust permissions of the X server host (be careful with this step, refer to 26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives) 27 | 28 | ```bash 29 | xhost + 30 | ``` 31 | 32 | Then run a container with the demo: 33 | 34 | ``` 35 | docker run --rm -it \ 36 | -e DISPLAY=${DISPLAY} \ 37 | --privileged \ 38 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 39 | --device=/dev/video0:/dev/video0 \ 40 | --ipc=host maskrcnn-benchmark \ 41 | python demo/webcam.py --min-image-size 300 \ 42 | --config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml 43 | ``` 44 | 45 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 46 | the volume mapping may vary depending on your platform* 47 | -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png -------------------------------------------------------------------------------- /demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png -------------------------------------------------------------------------------- /demo/test_visualise.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.pylab as pylab 3 | import os 4 | import cv2 5 | 6 | from PIL import Image 7 | import numpy as np 8 | 9 | # this makes our figures bigger 10 | pylab.rcParams['figure.figsize'] = 20, 12 11 | 12 | from maskrcnn_benchmark.config import cfg 13 | from predictor import COCODemo 14 | 15 | config_file = "configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml" 16 | 17 | # update the config options with the config file 18 | cfg.merge_from_file(config_file) 19 | cfg.MODEL.WEIGHT = 'models/e2e_mask_rcnn_X_101_32x8d_FPN_1x/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth' 20 | # manual override some options 21 | # cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) 22 | 23 | coco_demo = COCODemo( 24 | cfg, 25 | min_image_size=800, 26 | confidence_threshold=0.7, 27 | ) 28 | 29 | # val_path = 'datasets/coco/val2017/' # this is the validation image data 30 | val_path = '/home/lz/Downloads/sample/' 31 | # output_path = 'models/e2e_mask_rcnn_R_50_FPN_011d_33_g4_14812_sum_re_1x/output/' 32 | output_path = '/home/lz/Downloads/output/' 33 | imglistval = os.listdir(val_path) 34 | for name in imglistval: 35 | print(name) 36 | imgfile = val_path + name 37 | pil_image = Image.open(imgfile).convert("RGB") 38 | image = np.array(pil_image)[:, :, [2, 1, 0]] 39 | 40 | predictions = coco_demo.run_on_opencv_image(image) # forward predict 41 | 42 | savefile = output_path + name 43 | 44 | cv2.imwrite(savefile, predictions) 45 | -------------------------------------------------------------------------------- /demo/webcam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import argparse 3 | import cv2 4 | 5 | from maskrcnn_benchmark.config import cfg 6 | from predictor import COCODemo 7 | 8 | import time 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo") 13 | parser.add_argument( 14 | "--config-file", 15 | default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", 16 | metavar="FILE", 17 | help="path to config file", 18 | ) 19 | parser.add_argument( 20 | "--confidence-threshold", 21 | type=float, 22 | default=0.7, 23 | help="Minimum score for the prediction to be shown", 24 | ) 25 | parser.add_argument( 26 | "--min-image-size", 27 | type=int, 28 | default=224, 29 | help="Smallest size of the image to feed to the model. " 30 | "Model was trained with 800, which gives best results", 31 | ) 32 | parser.add_argument( 33 | "--show-mask-heatmaps", 34 | dest="show_mask_heatmaps", 35 | help="Show a heatmap probability for the top masks-per-dim masks", 36 | action="store_true", 37 | ) 38 | parser.add_argument( 39 | "--masks-per-dim", 40 | type=int, 41 | default=2, 42 | help="Number of heatmaps per dimension to show", 43 | ) 44 | parser.add_argument( 45 | "opts", 46 | help="Modify model config options using the command-line", 47 | default=None, 48 | nargs=argparse.REMAINDER, 49 | ) 50 | 51 | args = parser.parse_args() 52 | 53 | # load config from file and command-line arguments 54 | cfg.merge_from_file(args.config_file) 55 | cfg.merge_from_list(args.opts) 56 | cfg.freeze() 57 | 58 | # prepare object that handles inference plus adds predictions on top of image 59 | coco_demo = COCODemo( 60 | cfg, 61 | confidence_threshold=args.confidence_threshold, 62 | show_mask_heatmaps=args.show_mask_heatmaps, 63 | masks_per_dim=args.masks_per_dim, 64 | min_image_size=args.min_image_size, 65 | ) 66 | 67 | cam = cv2.VideoCapture(0) 68 | while True: 69 | start_time = time.time() 70 | ret_val, img = cam.read() 71 | composite = coco_demo.run_on_opencv_image(img) 72 | print("Time: {:.2f} s / img".format(time.time() - start_time)) 73 | cv2.imshow("COCO detections", composite) 74 | if cv2.waitKey(1) == 27: 75 | break # esc to quit 76 | cv2.destroyAllWindows() 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \ 11 | && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev 12 | 13 | # Install Miniconda 14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 15 | && chmod +x /miniconda.sh \ 16 | && /miniconda.sh -b -p /miniconda \ 17 | && rm /miniconda.sh 18 | 19 | ENV PATH=/miniconda/bin:$PATH 20 | 21 | # Create a Python 3.6 environment 22 | RUN /miniconda/bin/conda install -y conda-build \ 23 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 24 | && /miniconda/bin/conda clean -ya 25 | 26 | ENV CONDA_DEFAULT_ENV=py36 27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 28 | ENV PATH=$CONDA_PREFIX/bin:$PATH 29 | ENV CONDA_AUTO_UPDATE_CONDA=false 30 | 31 | RUN conda install -y ipython 32 | RUN pip install ninja yacs cython matplotlib opencv-python tqdm 33 | 34 | # Install PyTorch 1.0 Nightly 35 | ARG CUDA 36 | RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \ 37 | && conda clean -ya 38 | 39 | # Install TorchVision master 40 | RUN git clone https://github.com/pytorch/vision.git \ 41 | && cd vision \ 42 | && python setup.py install 43 | 44 | # install pycocotools 45 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 46 | && cd cocoapi/PythonAPI \ 47 | && python setup.py build_ext install 48 | 49 | # install apex 50 | RUN git clone https://github.com/NVIDIA/apex.git \ 51 | && cd apex \ 52 | && python setup.py install --cuda_ext --cpp_ext 53 | 54 | # install PyTorch Detection 55 | ARG FORCE_CUDA="1" 56 | ENV FORCE_CUDA=${FORCE_CUDA} 57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 58 | && cd maskrcnn-benchmark \ 59 | && python setup.py build develop 60 | 61 | WORKDIR /maskrcnn-benchmark 62 | -------------------------------------------------------------------------------- /docker/docker-jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ 11 | 12 | # Install Miniconda 13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 14 | && chmod +x /miniconda.sh \ 15 | && /miniconda.sh -b -p /miniconda \ 16 | && rm /miniconda.sh 17 | 18 | ENV PATH=/miniconda/bin:$PATH 19 | 20 | # Create a Python 3.6 environment 21 | RUN /miniconda/bin/conda install -y conda-build \ 22 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 23 | && /miniconda/bin/conda clean -ya 24 | 25 | ENV CONDA_DEFAULT_ENV=py36 26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 27 | ENV PATH=$CONDA_PREFIX/bin:$PATH 28 | ENV CONDA_AUTO_UPDATE_CONDA=false 29 | 30 | RUN conda install -y ipython 31 | RUN pip install ninja yacs cython matplotlib jupyter 32 | 33 | # Install PyTorch 1.0 Nightly and OpenCV 34 | RUN conda install -y pytorch-nightly -c pytorch \ 35 | && conda install -y opencv -c menpo \ 36 | && conda clean -ya 37 | 38 | WORKDIR /root 39 | 40 | USER root 41 | 42 | RUN mkdir /notebooks 43 | 44 | WORKDIR /notebooks 45 | 46 | # Install TorchVision master 47 | RUN git clone https://github.com/pytorch/vision.git \ 48 | && cd vision \ 49 | && python setup.py install 50 | 51 | # install pycocotools 52 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 53 | && cd cocoapi/PythonAPI \ 54 | && python setup.py build_ext install 55 | 56 | # install PyTorch Detection 57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 58 | && cd maskrcnn-benchmark \ 59 | && python setup.py build develop 60 | 61 | RUN jupyter notebook --generate-config 62 | 63 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py" 64 | 65 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH} 66 | 67 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"] 68 | -------------------------------------------------------------------------------- /docker/docker-jupyter/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from IPython.lib import passwd 3 | 4 | #c = c # pylint:disable=undefined-variable 5 | c = get_config() 6 | c.NotebookApp.ip = '0.0.0.0' 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888)) 8 | c.NotebookApp.open_browser = False 9 | 10 | # sets a password if PASSWORD is set in the environment 11 | if 'PASSWORD' in os.environ: 12 | password = os.environ['PASSWORD'] 13 | if password: 14 | c.NotebookApp.password = passwd(password) 15 | else: 16 | c.NotebookApp.password = '' 17 | c.NotebookApp.token = '' 18 | del os.environ['PASSWORD'] 19 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | #include "deform_conv.h" 7 | #include "deform_pool.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("nms", &nms, "non-maximum suppression"); 11 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 12 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 13 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 14 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 15 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 16 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 17 | // dcn-v2 18 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 19 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 20 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 21 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 22 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 23 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 24 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 25 | m.def("deform_unfold_forward", &deform_unfold_forward, "deform unfold forward"); 26 | m.def("deform_unfold_backward_input", &deform_unfold_backward_input, "deform_unfold_backward_input"); 27 | } -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | 6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | else: 26 | dataset_name = dataset.__class__.__name__ 27 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 28 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | brightness = cfg.INPUT.BRIGHTNESS 11 | contrast = cfg.INPUT.CONTRAST 12 | saturation = cfg.INPUT.SATURATION 13 | hue = cfg.INPUT.HUE 14 | else: 15 | min_size = cfg.INPUT.MIN_SIZE_TEST 16 | max_size = cfg.INPUT.MAX_SIZE_TEST 17 | flip_prob = 0 18 | brightness = 0.0 19 | contrast = 0.0 20 | saturation = 0.0 21 | hue = 0.0 22 | 23 | to_bgr255 = cfg.INPUT.TO_BGR255 24 | normalize_transform = T.Normalize( 25 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 26 | ) 27 | color_jitter = T.ColorJitter( 28 | brightness=brightness, 29 | contrast=contrast, 30 | saturation=saturation, 31 | hue=hue, 32 | ) 33 | 34 | transform = T.Compose( 35 | [ 36 | color_jitter, 37 | T.Resize(min_size, max_size), 38 | T.RandomHorizontalFlip(flip_prob), 39 | T.ToTensor(), 40 | normalize_transform, 41 | ] 42 | ) 43 | return transform 44 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import DFConv2d 7 | from .misc import ConvTranspose2d 8 | from .misc import BatchNorm2d 9 | from .misc import interpolate 10 | from .nms import nms 11 | from .roi_align import ROIAlign 12 | from .roi_align import roi_align 13 | from .roi_pool import ROIPool 14 | from .roi_pool import roi_pool 15 | from .smooth_l1_loss import smooth_l1_loss 16 | from .sigmoid_focal_loss import SigmoidFocalLoss 17 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 18 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 19 | from .dcn.deform_pool_func import deform_roi_pooling 20 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 21 | from .dcn.deform_unfold_func import deform_unfold 22 | from .dcn.deform_unfold_module import DeformUnfold 23 | from .dynamic_weight import DynamicWeightsCat11, DynamicWeightsCat33, ReDynamicWeightsCat33, ReDynamicWeightsCat11 24 | 25 | 26 | __all__ = [ 27 | "nms", 28 | "roi_align", 29 | "ROIAlign", 30 | "roi_pool", 31 | "ROIPool", 32 | "smooth_l1_loss", 33 | "Conv2d", 34 | "DFConv2d", 35 | "ConvTranspose2d", 36 | "interpolate", 37 | "BatchNorm2d", 38 | "FrozenBatchNorm2d", 39 | "SigmoidFocalLoss", 40 | 'deform_conv', 41 | 'modulated_deform_conv', 42 | 'DeformConv', 43 | 'ModulatedDeformConv', 44 | 'ModulatedDeformConvPack', 45 | 'deform_roi_pooling', 46 | 'DeformRoIPooling', 47 | 'DeformRoIPoolingPack', 48 | 'ModulatedDeformRoIPoolingPack', 49 | 'deform_unfold', 50 | 'DeformUnfold', 51 | 'DynamicWeightsCat11', 52 | 'DynamicWeightsCat33', 53 | 'ReDynamicWeightsCat33', 54 | 'ReDynamicWeightsCat11', 55 | 'DeformDGMN', 56 | # 'GloReLocalModule', 57 | ] 58 | 59 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | # Cast all fixed parameters to half() if necessary 21 | if x.dtype == torch.float16: 22 | self.weight = self.weight.half() 23 | self.bias = self.bias.half() 24 | self.running_mean = self.running_mean.half() 25 | self.running_var = self.running_var.half() 26 | 27 | scale = self.weight * self.running_var.rsqrt() 28 | bias = self.bias - self.running_mean * scale 29 | scale = scale.reshape(1, -1, 1, 1) 30 | bias = bias.reshape(1, -1, 1, 1) 31 | return x * scale + bias 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from maskrcnn_benchmark import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/deform_unfold_module.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.modules.utils import _pair 6 | 7 | from .deform_unfold_func import deform_unfold 8 | 9 | 10 | class DeformUnfold(nn.Module): 11 | 12 | def __init__(self, 13 | kernel_size, 14 | stride=1, 15 | padding=0, 16 | dilation=1, 17 | deformable_groups=1, 18 | bias=False): 19 | assert not bias 20 | super(DeformUnfold, self).__init__() 21 | 22 | self.kernel_size = _pair(kernel_size) 23 | self.stride = _pair(stride) 24 | self.padding = _pair(padding) 25 | self.dilation = _pair(dilation) 26 | self.deformable_groups = deformable_groups 27 | 28 | 29 | def forward(self, input, offset): 30 | return deform_unfold(input, offset, self.kernel_size, self.stride, 31 | self.padding, self.dilation, 32 | self.deformable_groups) 33 | 34 | def __repr__(self): 35 | return "".join([ 36 | "{}(".format(self.__class__.__name__), 37 | "kernel_size={}, ".format(self.kernel_size), 38 | "stride={}, ".format(self.stride), 39 | "dilation={}, ".format(self.dilation), 40 | "padding={}, ".format(self.padding), 41 | "deformable_groups={}, ".format(self.deformable_groups), 42 | ]) 43 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | from apex import amp 6 | 7 | # Only valid with fp32 inputs - give AMP the hint 8 | nms = amp.float_function(_C.nms) 9 | 10 | # nms.__doc__ = """ 11 | # This function performs Non-maximum suppresion""" 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | from apex import amp 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | output = _C.roi_align_forward( 21 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 22 | ) 23 | return output 24 | 25 | @staticmethod 26 | @once_differentiable 27 | def backward(ctx, grad_output): 28 | rois, = ctx.saved_tensors 29 | output_size = ctx.output_size 30 | spatial_scale = ctx.spatial_scale 31 | sampling_ratio = ctx.sampling_ratio 32 | bs, ch, h, w = ctx.input_shape 33 | grad_input = _C.roi_align_backward( 34 | grad_output, 35 | rois, 36 | spatial_scale, 37 | output_size[0], 38 | output_size[1], 39 | bs, 40 | ch, 41 | h, 42 | w, 43 | sampling_ratio, 44 | ) 45 | return grad_input, None, None, None, None 46 | 47 | 48 | roi_align = _ROIAlign.apply 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | @amp.float_function 58 | def forward(self, input, rois): 59 | return roi_align( 60 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 61 | ) 62 | 63 | def __repr__(self): 64 | tmpstr = self.__class__.__name__ + "(" 65 | tmpstr += "output_size=" + str(self.output_size) 66 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 67 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 68 | tmpstr += ")" 69 | return tmpstr 70 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | from apex import amp 11 | 12 | class _ROIPool(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale): 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.input_shape = input.size() 18 | output, argmax = _C.roi_pool_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1] 20 | ) 21 | ctx.save_for_backward(input, roi, argmax) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | input, rois, argmax = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_pool_backward( 32 | grad_output, 33 | input, 34 | rois, 35 | argmax, 36 | spatial_scale, 37 | output_size[0], 38 | output_size[1], 39 | bs, 40 | ch, 41 | h, 42 | w, 43 | ) 44 | return grad_input, None, None, None 45 | 46 | 47 | roi_pool = _ROIPool.apply 48 | 49 | 50 | class ROIPool(nn.Module): 51 | def __init__(self, output_size, spatial_scale): 52 | super(ROIPool, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | 56 | @amp.float_function 57 | def forward(self, input, rois): 58 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 59 | 60 | def __repr__(self): 61 | tmpstr = self.__class__.__name__ + "(" 62 | tmpstr += "output_size=" + str(self.output_size) 63 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 64 | tmpstr += ")" 65 | return tmpstr 66 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from maskrcnn_benchmark import _C 7 | 8 | # TODO: Use JIT to replace CUDA implementation in the future. 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, gamma, alpha): 12 | ctx.save_for_backward(logits, targets) 13 | num_classes = logits.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | losses = _C.sigmoid_focalloss_forward( 19 | logits, targets, num_classes, gamma, alpha 20 | ) 21 | return losses 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, d_loss): 26 | logits, targets = ctx.saved_tensors 27 | num_classes = ctx.num_classes 28 | gamma = ctx.gamma 29 | alpha = ctx.alpha 30 | d_loss = d_loss.contiguous() 31 | d_logits = _C.sigmoid_focalloss_backward( 32 | logits, targets, d_loss, num_classes, gamma, alpha 33 | ) 34 | return d_logits, None, None, None, None 35 | 36 | 37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 38 | 39 | 40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 41 | num_classes = logits.shape[1] 42 | gamma = gamma[0] 43 | alpha = alpha[0] 44 | dtype = targets.dtype 45 | device = targets.device 46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 47 | 48 | t = targets.unsqueeze(1) 49 | p = torch.sigmoid(logits) 50 | term1 = (1 - p) ** gamma * torch.log(p) 51 | term2 = p ** gamma * torch.log(1 - p) 52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | return loss.sum() 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "gamma=" + str(self.gamma) 74 | tmpstr += ", alpha=" + str(self.alpha) 75 | tmpstr += ")" 76 | return tmpstr 77 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | from . import fbnet 4 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform 8 | from . import fpn as fpn_module 9 | from . import resnet 10 | 11 | 12 | @registry.BACKBONES.register("R-50-C4") 13 | @registry.BACKBONES.register("R-50-C5") 14 | @registry.BACKBONES.register("R-101-C4") 15 | @registry.BACKBONES.register("R-101-C5") 16 | def build_resnet_backbone(cfg): 17 | body = resnet.ResNet(cfg) 18 | model = nn.Sequential(OrderedDict([("body", body)])) 19 | model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 20 | return model 21 | 22 | 23 | @registry.BACKBONES.register("R-50-FPN") 24 | @registry.BACKBONES.register("R-101-FPN") 25 | @registry.BACKBONES.register("R-152-FPN") 26 | def build_resnet_fpn_backbone(cfg): 27 | body = resnet.ResNet(cfg) 28 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 29 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 30 | fpn = fpn_module.FPN( 31 | in_channels_list=[ 32 | in_channels_stage2, 33 | in_channels_stage2 * 2, 34 | in_channels_stage2 * 4, 35 | in_channels_stage2 * 8, 36 | ], 37 | out_channels=out_channels, 38 | conv_block=conv_with_kaiming_uniform( 39 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 40 | ), 41 | top_blocks=fpn_module.LastLevelMaxPool(), 42 | ) 43 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 44 | model.out_channels = out_channels 45 | return model 46 | 47 | 48 | @registry.BACKBONES.register("R-50-FPN-RETINANET") 49 | @registry.BACKBONES.register("R-101-FPN-RETINANET") 50 | def build_resnet_fpn_p3p7_backbone(cfg): 51 | body = resnet.ResNet(cfg) 52 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 53 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 54 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 55 | else out_channels 56 | fpn = fpn_module.FPN( 57 | in_channels_list=[ 58 | 0, 59 | in_channels_stage2 * 2, 60 | in_channels_stage2 * 4, 61 | in_channels_stage2 * 8, 62 | ], 63 | out_channels=out_channels, 64 | conv_block=conv_with_kaiming_uniform( 65 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 66 | ), 67 | top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels), 68 | ) 69 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 70 | model.out_channels = out_channels 71 | return model 72 | 73 | 74 | def build_backbone(cfg): 75 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 76 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 77 | cfg.MODEL.BACKBONE.CONV_BODY 78 | ) 79 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRCNN, self).__init__() 28 | 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg, self.backbone.out_channels) 31 | self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels) 32 | 33 | def forward(self, images, targets=None): 34 | """ 35 | Arguments: 36 | images (list[Tensor] or ImageList): images to be processed 37 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 38 | 39 | Returns: 40 | result (list[BoxList] or dict[Tensor]): the output from the model. 41 | During training, it returns a dict[Tensor] which contains the losses. 42 | During testing, it returns list[BoxList] contains additional fields 43 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 44 | 45 | """ 46 | if self.training and targets is None: 47 | raise ValueError("In training mode, targets should be passed") 48 | images = to_image_list(images) 49 | features = self.backbone(images.tensors) 50 | proposals, proposal_losses = self.rpn(images, features, targets) 51 | if self.roi_heads: 52 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 53 | else: 54 | # RPN-only models don't have roi_heads 55 | x = features 56 | result = proposals 57 | detector_losses = {} 58 | 59 | if self.training: 60 | losses = {} 61 | losses.update(detector_losses) 62 | losses.update(proposal_losses) 63 | return losses 64 | 65 | return result 66 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | RPN_HEADS = Registry() 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 8 | ROI_BOX_PREDICTOR = Registry() 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 10 | ROI_KEYPOINT_PREDICTOR = Registry() 11 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 12 | ROI_MASK_PREDICTOR = Registry() 13 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg, in_channels): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels) 19 | self.predictor = make_roi_box_predictor( 20 | cfg, self.feature_extractor.out_channels) 21 | self.post_processor = make_roi_box_post_processor(cfg) 22 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 23 | 24 | def forward(self, features, proposals, targets=None): 25 | """ 26 | Arguments: 27 | features (list[Tensor]): feature-maps from possibly several levels 28 | proposals (list[BoxList]): proposal boxes 29 | targets (list[BoxList], optional): the ground-truth targets. 30 | 31 | Returns: 32 | x (Tensor): the result of the feature extractor 33 | proposals (list[BoxList]): during training, the subsampled proposals 34 | are returned. During testing, the predicted boxlists are returned 35 | losses (dict[Tensor]): During training, returns the losses for the 36 | head. During testing, returns an empty dict. 37 | """ 38 | 39 | if self.training: 40 | # Faster R-CNN subsamples during training the proposals with a fixed 41 | # positive / negative ratio 42 | with torch.no_grad(): 43 | proposals = self.loss_evaluator.subsample(proposals, targets) 44 | 45 | # extract features that will be fed to the final classifier. The 46 | # feature_extractor generally corresponds to the pooler + heads 47 | x = self.feature_extractor(features, proposals) 48 | # final classifier that converts the features into predictions 49 | class_logits, box_regression = self.predictor(x) 50 | 51 | if not self.training: 52 | result = self.post_processor((class_logits, box_regression), proposals) 53 | return x, result, {} 54 | 55 | loss_classifier, loss_box_reg = self.loss_evaluator( 56 | [class_logits], [box_regression] 57 | ) 58 | return ( 59 | x, 60 | proposals, 61 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 62 | ) 63 | 64 | 65 | def build_roi_box_head(cfg, in_channels): 66 | """ 67 | Constructs a new box head. 68 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 69 | and make it a parameter in the config 70 | """ 71 | return ROIBoxHead(cfg, in_channels) 72 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.modeling import registry 3 | from torch import nn 4 | 5 | 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | 12 | num_inputs = in_channels 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AdaptiveAvgPool2d(1) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 18 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 19 | 20 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 21 | nn.init.constant_(self.cls_score.bias, 0) 22 | 23 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 24 | nn.init.constant_(self.bbox_pred.bias, 0) 25 | 26 | def forward(self, x): 27 | x = self.avgpool(x) 28 | x = x.view(x.size(0), -1) 29 | cls_logit = self.cls_score(x) 30 | bbox_pred = self.bbox_pred(x) 31 | return cls_logit, bbox_pred 32 | 33 | 34 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 35 | class FPNPredictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(FPNPredictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | representation_size = in_channels 40 | 41 | self.cls_score = nn.Linear(representation_size, num_classes) 42 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 43 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 44 | 45 | nn.init.normal_(self.cls_score.weight, std=0.01) 46 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 47 | for l in [self.cls_score, self.bbox_pred]: 48 | nn.init.constant_(l.bias, 0) 49 | 50 | def forward(self, x): 51 | if x.ndimension() == 4: 52 | assert list(x.shape[2:]) == [1, 1] 53 | x = x.view(x.size(0), -1) 54 | scores = self.cls_score(x) 55 | bbox_deltas = self.bbox_pred(x) 56 | 57 | return scores, bbox_deltas 58 | 59 | 60 | def make_roi_box_predictor(cfg, in_channels): 61 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg, in_channels) 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg, in_channels): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 14 | self.predictor = make_roi_keypoint_predictor( 15 | cfg, self.feature_extractor.out_channels) 16 | self.post_processor = make_roi_keypoint_post_processor(cfg) 17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 18 | 19 | def forward(self, features, proposals, targets=None): 20 | """ 21 | Arguments: 22 | features (list[Tensor]): feature-maps from possibly several levels 23 | proposals (list[BoxList]): proposal boxes 24 | targets (list[BoxList], optional): the ground-truth targets. 25 | 26 | Returns: 27 | x (Tensor): the result of the feature extractor 28 | proposals (list[BoxList]): during training, the original proposals 29 | are returned. During testing, the predicted boxlists are returned 30 | with the `mask` field set 31 | losses (dict[Tensor]): During training, returns the losses for the 32 | head. During testing, returns an empty dict. 33 | """ 34 | if self.training: 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.subsample(proposals, targets) 37 | 38 | x = self.feature_extractor(features, proposals) 39 | kp_logits = self.predictor(x) 40 | 41 | if not self.training: 42 | result = self.post_processor(kp_logits, proposals) 43 | return x, result, {} 44 | 45 | loss_kp = self.loss_evaluator(proposals, kp_logits) 46 | 47 | return x, proposals, dict(loss_kp=loss_kp) 48 | 49 | 50 | def build_roi_keypoint_head(cfg, in_channels): 51 | return ROIKeypointHead(cfg, in_channels) 52 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.modeling import registry 5 | from maskrcnn_benchmark.modeling.poolers import Pooler 6 | 7 | from maskrcnn_benchmark.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from maskrcnn_benchmark import layers 4 | from maskrcnn_benchmark.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.poolers import Pooler 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | registry.ROI_MASK_FEATURE_EXTRACTORS.register( 12 | "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor 13 | ) 14 | 15 | 16 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor") 17 | class MaskRCNNFPNFeatureExtractor(nn.Module): 18 | """ 19 | Heads for FPN for classification 20 | """ 21 | 22 | def __init__(self, cfg, in_channels): 23 | """ 24 | Arguments: 25 | num_classes (int): number of output classes 26 | input_size (int): number of channels of the input once it's flattened 27 | representation_size (int): size of the intermediate representation 28 | """ 29 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 30 | 31 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 32 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 33 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 34 | pooler = Pooler( 35 | output_size=(resolution, resolution), 36 | scales=scales, 37 | sampling_ratio=sampling_ratio, 38 | ) 39 | input_size = in_channels 40 | self.pooler = pooler 41 | 42 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 43 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 44 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 45 | 46 | next_feature = input_size 47 | self.blocks = [] 48 | for layer_idx, layer_features in enumerate(layers, 1): 49 | layer_name = "mask_fcn{}".format(layer_idx) 50 | module = make_conv3x3( 51 | next_feature, layer_features, 52 | dilation=dilation, stride=1, use_gn=use_gn 53 | ) 54 | self.add_module(layer_name, module) 55 | next_feature = layer_features 56 | self.blocks.append(layer_name) 57 | self.out_channels = layer_features 58 | 59 | def forward(self, x, proposals): 60 | x = self.pooler(x, proposals) 61 | 62 | for layer_name in self.blocks: 63 | x = F.relu(getattr(self, layer_name)(x)) 64 | 65 | return x 66 | 67 | 68 | def make_roi_mask_feature_extractor(cfg, in_channels): 69 | func = registry.ROI_MASK_FEATURE_EXTRACTORS[ 70 | cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR 71 | ] 72 | return func(cfg, in_channels) 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | from maskrcnn_benchmark.modeling import registry 8 | 9 | 10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 11 | class MaskRCNNC4Predictor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(MaskRCNNC4Predictor, self).__init__() 14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 16 | num_inputs = in_channels 17 | 18 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 19 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 20 | 21 | for name, param in self.named_parameters(): 22 | if "bias" in name: 23 | nn.init.constant_(param, 0) 24 | elif "weight" in name: 25 | # Caffe2 implementation uses MSRAFill, which in fact 26 | # corresponds to kaiming_normal_ in PyTorch 27 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 28 | 29 | def forward(self, x): 30 | x = F.relu(self.conv5_mask(x)) 31 | return self.mask_fcn_logits(x) 32 | 33 | 34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 35 | class MaskRCNNConv1x1Predictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(MaskRCNNConv1x1Predictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | num_inputs = in_channels 40 | 41 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 42 | 43 | for name, param in self.named_parameters(): 44 | if "bias" in name: 45 | nn.init.constant_(param, 0) 46 | elif "weight" in name: 47 | # Caffe2 implementation uses MSRAFill, which in fact 48 | # corresponds to kaiming_normal_ in PyTorch 49 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 50 | 51 | def forward(self, x): 52 | return self.mask_fcn_logits(x) 53 | 54 | 55 | def make_roi_mask_predictor(cfg, in_channels): 56 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 57 | return func(cfg, in_channels) 58 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Utility functions minipulating the prediction layers 4 | """ 5 | 6 | from ..utils import cat 7 | 8 | import torch 9 | 10 | def permute_and_flatten(layer, N, A, C, H, W): 11 | layer = layer.view(N, -1, C, H, W) 12 | layer = layer.permute(0, 3, 4, 1, 2) 13 | layer = layer.reshape(N, -1, C) 14 | return layer 15 | 16 | 17 | def concat_box_prediction_layers(box_cls, box_regression): 18 | box_cls_flattened = [] 19 | box_regression_flattened = [] 20 | # for each feature level, permute the outputs to make them be in the 21 | # same format as the labels. Note that the labels are computed for 22 | # all feature levels concatenated, so we keep the same representation 23 | # for the objectness and the box_regression 24 | for box_cls_per_level, box_regression_per_level in zip( 25 | box_cls, box_regression 26 | ): 27 | N, AxC, H, W = box_cls_per_level.shape 28 | Ax4 = box_regression_per_level.shape[1] 29 | A = Ax4 // 4 30 | C = AxC // A 31 | box_cls_per_level = permute_and_flatten( 32 | box_cls_per_level, N, A, C, H, W 33 | ) 34 | box_cls_flattened.append(box_cls_per_level) 35 | 36 | box_regression_per_level = permute_and_flatten( 37 | box_regression_per_level, N, A, 4, H, W 38 | ) 39 | box_regression_flattened.append(box_regression_per_level) 40 | # concatenate on the first dimension (representing the feature levels), to 41 | # take into account the way the labels were generated (with all feature maps 42 | # being concatenated as well) 43 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 44 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 45 | return box_cls, box_regression 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | if tensors.dim() == 3: 45 | tensors = tensors[None] 46 | assert tensors.dim() == 4 47 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 48 | return ImageList(tensors, image_sizes) 49 | elif isinstance(tensors, (tuple, list)): 50 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 51 | 52 | # TODO Ideally, just remove this and let me model handle arbitrary 53 | # input sizs 54 | if size_divisible > 0: 55 | import math 56 | 57 | stride = size_divisible 58 | max_size = list(max_size) 59 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 60 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 61 | max_size = tuple(max_size) 62 | 63 | batch_shape = (len(tensors),) + max_size 64 | batched_imgs = tensors[0].new(*batch_shape).zero_() 65 | for img, pad_img in zip(tensors, batched_imgs): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | 68 | image_sizes = [im.shape[-2:] for im in tensors] 69 | 70 | return ImageList(batched_imgs, image_sizes) 71 | else: 72 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | yacs 3 | cython 4 | matplotlib 5 | tqdm 6 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #export NGPUS=8 4 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file "./configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml" 5 | 6 | 7 | export NGPUS=8 8 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file "./configs/dw/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml" 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./test_single_model_1gpu_totalbs1.sh 4 | export NGPUS=4 5 | for i in 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \ 6 | python -m torch.distributed.launch --nproc_per_node=$NGPUS \ 7 | tools/test_net.py \ 8 | --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" \ 9 | OUTPUT_DIR "models/e2e_mask_rcnn_R_50_FPN_1x" \ 10 | MODEL.WEIGHT "models/e2e_mask_rcnn_R_50_FPN_1x/model_${i}.pth" \ 11 | TEST.IMS_PER_BATCH 16; \ 12 | done 13 | 14 | #for i in 0090000 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \ 15 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS \ 16 | # tools/test_net.py \ 17 | # --config-file "configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml" \ 18 | # OUTPUT_DIR "models/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x" \ 19 | # MODEL.WEIGHT "models/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x/model_${i}.pth" \ 20 | # TEST.IMS_PER_BATCH 16; \ 21 | # done 22 | # MODEL.WEIGHT "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x/model_${i}.pth" \ 23 | 24 | 25 | 26 | #for i in 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \ 27 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS \ 28 | # tools/test_net.py \ 29 | # --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" \ 30 | # OUTPUT_DIR "models/Non-local" \ 31 | # MODEL.WEIGHT "models/Non-local/model_${i}.pth" \ 32 | # TEST.IMS_PER_BATCH 16; \ 33 | # done -------------------------------------------------------------------------------- /tests/env_tests/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import os 4 | 5 | 6 | def get_config_root_path(): 7 | ''' Path to configs for unit tests ''' 8 | # cur_file_dir is root/tests/env_tests 9 | cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) 10 | ret = os.path.dirname(os.path.dirname(cur_file_dir)) 11 | ret = os.path.join(ret, "configs") 12 | return ret 13 | -------------------------------------------------------------------------------- /tests/test_backbones.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register backbones 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA 8 | from maskrcnn_benchmark.modeling import registry 9 | from maskrcnn_benchmark.config import cfg as g_cfg 10 | from utils import load_config 11 | 12 | 13 | # overwrite configs if specified, otherwise default config is used 14 | BACKBONE_CFGS = { 15 | "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml", 16 | "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 17 | "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 18 | "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml", 19 | "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml", 20 | } 21 | 22 | 23 | class TestBackbones(unittest.TestCase): 24 | def test_build_backbones(self): 25 | ''' Make sure backbones run ''' 26 | 27 | self.assertGreater(len(registry.BACKBONES), 0) 28 | 29 | for name, backbone_builder in registry.BACKBONES.items(): 30 | print('Testing {}...'.format(name)) 31 | if name in BACKBONE_CFGS: 32 | cfg = load_config(BACKBONE_CFGS[name]) 33 | else: 34 | # Use default config if config file is not specified 35 | cfg = copy.deepcopy(g_cfg) 36 | backbone = backbone_builder(cfg) 37 | 38 | # make sures the backbone has `out_channels` 39 | self.assertIsNotNone( 40 | getattr(backbone, 'out_channels', None), 41 | 'Need to provide out_channels for backbone {}'.format(name) 42 | ) 43 | 44 | N, C_in, H, W = 2, 3, 224, 256 45 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 46 | out = backbone(input) 47 | for cur_out in out: 48 | self.assertEqual( 49 | cur_out.shape[:2], 50 | torch.Size([N, backbone.out_channels]) 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/test_configs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import glob 5 | import os 6 | import utils 7 | 8 | 9 | class TestConfigs(unittest.TestCase): 10 | def test_configs_load(self): 11 | ''' Make sure configs are loadable ''' 12 | 13 | cfg_root_path = utils.get_config_root_path() 14 | files = glob.glob( 15 | os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True) 16 | self.assertGreater(len(files), 0) 17 | 18 | for fn in files: 19 | print('Loading {}...'.format(fn)) 20 | utils.load_config_from_file(fn) 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /tests/test_metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 5 | 6 | 7 | class TestMetricLogger(unittest.TestCase): 8 | def test_update(self): 9 | meter = MetricLogger() 10 | for i in range(10): 11 | meter.update(metric=float(i)) 12 | 13 | m = meter.meters["metric"] 14 | self.assertEqual(m.count, 10) 15 | self.assertEqual(m.total, 45) 16 | self.assertEqual(m.median, 4) 17 | self.assertEqual(m.avg, 4.5) 18 | 19 | def test_no_attr(self): 20 | meter = MetricLogger() 21 | _ = meter.meters 22 | _ = meter.delimiter 23 | def broken(): 24 | _ = meter.not_existent 25 | self.assertRaises(AttributeError, broken) 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/test_rpn_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register rpn heads 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA 8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA 9 | from maskrcnn_benchmark.modeling import registry 10 | from maskrcnn_benchmark.config import cfg as g_cfg 11 | from utils import load_config 12 | 13 | 14 | # overwrite configs if specified, otherwise default config is used 15 | RPN_CFGS = { 16 | } 17 | 18 | 19 | class TestRPNHeads(unittest.TestCase): 20 | def test_build_rpn_heads(self): 21 | ''' Make sure rpn heads run ''' 22 | 23 | self.assertGreater(len(registry.RPN_HEADS), 0) 24 | 25 | in_channels = 64 26 | num_anchors = 10 27 | 28 | for name, builder in registry.RPN_HEADS.items(): 29 | print('Testing {}...'.format(name)) 30 | if name in RPN_CFGS: 31 | cfg = load_config(RPN_CFGS[name]) 32 | else: 33 | # Use default config if config file is not specified 34 | cfg = copy.deepcopy(g_cfg) 35 | 36 | rpn = builder(cfg, in_channels, num_anchors) 37 | 38 | N, C_in, H, W = 2, in_channels, 24, 32 39 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 40 | LAYERS = 3 41 | out = rpn([input] * LAYERS) 42 | self.assertEqual(len(out), 2) 43 | logits, bbox_reg = out 44 | for idx in range(LAYERS): 45 | self.assertEqual( 46 | logits[idx].shape, 47 | torch.Size([ 48 | input.shape[0], num_anchors, 49 | input.shape[2], input.shape[3], 50 | ]) 51 | ) 52 | self.assertEqual( 53 | bbox_reg[idx].shape, 54 | torch.Size([ 55 | logits[idx].shape[0], num_anchors * 4, 56 | logits[idx].shape[2], logits[idx].shape[3], 57 | ]), 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /tests/test_segmentation_mask.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | import torch 4 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 5 | 6 | 7 | class TestSegmentationMask(unittest.TestCase): 8 | def __init__(self, method_name='runTest'): 9 | super(TestSegmentationMask, self).__init__(method_name) 10 | poly = [[[423.0, 306.5, 406.5, 277.0, 400.0, 271.5, 389.5, 277.0, 11 | 387.5, 292.0, 384.5, 295.0, 374.5, 220.0, 378.5, 210.0, 12 | 391.0, 200.5, 404.0, 199.5, 414.0, 203.5, 425.5, 221.0, 13 | 438.5, 297.0, 423.0, 306.5], 14 | [100, 100, 200, 100, 200, 200, 100, 200], 15 | ]] 16 | width = 640 17 | height = 480 18 | size = width, height 19 | 20 | self.P = SegmentationMask(poly, size, 'poly') 21 | self.M = SegmentationMask(poly, size, 'poly').convert('mask') 22 | 23 | 24 | def L1(self, A, B): 25 | diff = A.get_mask_tensor() - B.get_mask_tensor() 26 | diff = torch.sum(torch.abs(diff.float())).item() 27 | return diff 28 | 29 | 30 | def test_convert(self): 31 | M_hat = self.M.convert('poly').convert('mask') 32 | P_hat = self.P.convert('mask').convert('poly') 33 | 34 | diff_mask = self.L1(self.M, M_hat) 35 | diff_poly = self.L1(self.P, P_hat) 36 | self.assertTrue(diff_mask == diff_poly) 37 | self.assertTrue(diff_mask <= 8169.) 38 | self.assertTrue(diff_poly <= 8169.) 39 | 40 | 41 | def test_crop(self): 42 | box = [400, 250, 500, 300] # xyxy 43 | diff = self.L1(self.M.crop(box), self.P.crop(box)) 44 | self.assertTrue(diff <= 1.) 45 | 46 | 47 | def test_resize(self): 48 | new_size = 50, 25 49 | M_hat = self.M.resize(new_size) 50 | P_hat = self.P.resize(new_size) 51 | diff = self.L1(M_hat, P_hat) 52 | 53 | self.assertTrue(self.M.size == self.P.size) 54 | self.assertTrue(M_hat.size == P_hat.size) 55 | self.assertTrue(self.M.size != M_hat.size) 56 | self.assertTrue(diff <= 255.) 57 | 58 | 59 | def test_transpose(self): 60 | FLIP_LEFT_RIGHT = 0 61 | FLIP_TOP_BOTTOM = 1 62 | diff_hor = self.L1(self.M.transpose(FLIP_LEFT_RIGHT), 63 | self.P.transpose(FLIP_LEFT_RIGHT)) 64 | 65 | diff_ver = self.L1(self.M.transpose(FLIP_TOP_BOTTOM), 66 | self.P.transpose(FLIP_TOP_BOTTOM)) 67 | 68 | self.assertTrue(diff_hor <= 53250.) 69 | self.assertTrue(diff_ver <= 42494.) 70 | 71 | 72 | if __name__ == "__main__": 73 | 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # Set up custom environment before nearly anything else is imported 4 | # NOTE: this should be the first import (no not reorder) 5 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 6 | import env_tests.env as env_tests 7 | 8 | import os 9 | import copy 10 | 11 | from maskrcnn_benchmark.config import cfg as g_cfg 12 | 13 | 14 | def get_config_root_path(): 15 | return env_tests.get_config_root_path() 16 | 17 | 18 | def load_config(rel_path): 19 | ''' Load config from file path specified as path relative to config_root ''' 20 | cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path) 21 | return load_config_from_file(cfg_path) 22 | 23 | 24 | def load_config_from_file(file_path): 25 | ''' Load config from file path specified as absolute path ''' 26 | ret = copy.deepcopy(g_cfg) 27 | ret.merge_from_file(file_path) 28 | return ret 29 | -------------------------------------------------------------------------------- /tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Convert instances from png files to a dictionary 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111 5 | 6 | from __future__ import print_function, absolute_import, division 7 | import os, sys 8 | 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 10 | from csHelpers import * 11 | 12 | # Cityscapes imports 13 | from cityscapesscripts.evaluation.instance import * 14 | from cityscapesscripts.helpers.csHelpers import * 15 | import cv2 16 | from maskrcnn_benchmark.utils import cv2_util 17 | 18 | 19 | def instances2dict_with_polygons(imageFileList, verbose=False): 20 | imgCount = 0 21 | instanceDict = {} 22 | 23 | if not isinstance(imageFileList, list): 24 | imageFileList = [imageFileList] 25 | 26 | if verbose: 27 | print("Processing {} images...".format(len(imageFileList))) 28 | 29 | for imageFileName in imageFileList: 30 | # Load image 31 | img = Image.open(imageFileName) 32 | 33 | # Image as numpy array 34 | imgNp = np.array(img) 35 | 36 | # Initialize label categories 37 | instances = {} 38 | for label in labels: 39 | instances[label.name] = [] 40 | 41 | # Loop through all instance ids in instance image 42 | for instanceId in np.unique(imgNp): 43 | if instanceId < 1000: 44 | continue 45 | instanceObj = Instance(imgNp, instanceId) 46 | instanceObj_dict = instanceObj.toDict() 47 | 48 | #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict()) 49 | if id2label[instanceObj.labelID].hasInstances: 50 | mask = (imgNp == instanceId).astype(np.uint8) 51 | contour, hier = cv2_util.findContours( 52 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 53 | 54 | polygons = [c.reshape(-1).tolist() for c in contour] 55 | instanceObj_dict['contours'] = polygons 56 | 57 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 58 | 59 | imgKey = os.path.abspath(imageFileName) 60 | instanceDict[imgKey] = instances 61 | imgCount += 1 62 | 63 | if verbose: 64 | print("\rImages Processed: {}".format(imgCount), end=' ') 65 | sys.stdout.flush() 66 | 67 | if verbose: 68 | print("") 69 | 70 | return instanceDict 71 | 72 | def main(argv): 73 | fileList = [] 74 | if (len(argv) > 2): 75 | for arg in argv: 76 | if ("png" in arg): 77 | fileList.append(arg) 78 | instances2dict_with_polygons(fileList, True) 79 | 80 | if __name__ == "__main__": 81 | main(sys.argv[1:]) 82 | --------------------------------------------------------------------------------