├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TROUBLESHOOTING.md
├── configs
    ├── caffe2
    │   ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
    │   ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
    ├── cityscapes
    │   ├── README.md
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── dcn
    │   ├── README.md
    │   ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml
    │   ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml
    │   └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml
    ├── dw
    │   ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_2x.yaml
    │   └── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
    ├── e2e_faster_rcnn_R_50_C4_1x.yaml
    ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
    ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_faster_rcnn_fbnet.yaml
    ├── e2e_faster_rcnn_fbnet_600.yaml
    ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
    ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
    ├── e2e_mask_rcnn_R_50_C4_1x.yaml
    ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── e2e_mask_rcnn_fbnet.yaml
    ├── e2e_mask_rcnn_fbnet_600.yaml
    ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
    ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
    ├── gn_baselines
    │   ├── README.md
    │   ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
    ├── pascal_voc
    │   ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
    │   ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
    │   └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── quick_schedules
    │   ├── e2e_faster_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_C4_quick.yaml
    │   ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
    │   ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
    │   ├── rpn_R_50_C4_quick.yaml
    │   └── rpn_R_50_FPN_quick.yaml
    ├── retinanet
    │   ├── retinanet_R-101-FPN_1x.yaml
    │   ├── retinanet_R-101-FPN_P5_1x.yaml
    │   ├── retinanet_R-50-FPN_1x.yaml
    │   ├── retinanet_R-50-FPN_1x_quick.yaml
    │   ├── retinanet_R-50-FPN_P5_1x.yaml
    │   └── retinanet_X_101_32x8d_FPN_1x.yaml
    ├── rpn_R_101_FPN_1x.yaml
    ├── rpn_R_50_C4_1x.yaml
    ├── rpn_R_50_FPN_1x.yaml
    └── rpn_X_101_32x8d_FPN_1x.yaml
├── demo
    ├── Mask_R-CNN_demo.ipynb
    ├── README.md
    ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
    ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
    ├── predictor.py
    ├── test_visualise.py
    ├── visualise.py
    └── webcam.py
├── docker
    ├── Dockerfile
    └── docker-jupyter
    │   ├── Dockerfile
    │   └── jupyter_notebook_config.py
├── maskrcnn_benchmark
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── SigmoidFocalLoss.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── deform_conv_cuda.cu
    │   │   ├── deform_conv_kernel_cuda.cu
    │   │   ├── deform_pool_cuda.cu
    │   │   ├── deform_pool_kernel_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── deform_conv.h
    │   ├── deform_pool.h
    │   ├── nms.h
    │   └── vision.cpp
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── build.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── concat_dataset.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── coco_eval.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── voc_eval.py
    │   │   ├── list_dataset.py
    │   │   └── voc.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv_func.py
    │   │   ├── deform_conv_module.py
    │   │   ├── deform_pool_func.py
    │   │   ├── deform_pool_module.py
    │   │   ├── deform_unfold_func.py
    │   │   └── deform_unfold_module.py
    │   ├── dualgraph.py
    │   ├── dynamic_weight.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   ├── sigmoid_focal_loss.py
    │   └── smooth_l1_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fbnet.py
    │   │   ├── fbnet_builder.py
    │   │   ├── fbnet_modeldef.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   └── generalized_rcnn.py
    │   ├── make_layers.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── registry.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── keypoint_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── keypoint_head.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   └── roi_keypoint_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   └── retinanet.py
    │   │   ├── rpn.py
    │   │   └── utils.py
    │   └── utils.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   ├── keypoint.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── compute_flop.py
    │   ├── cv2_util.py
    │   ├── env.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── model_zoo.py
    │   ├── registry.py
    │   └── timer.py
├── requirements.txt
├── run.sh
├── setup.py
├── test.sh
├── tests
    ├── checkpoint.py
    ├── env_tests
    │   └── env.py
    ├── test_backbones.py
    ├── test_box_coder.py
    ├── test_configs.py
    ├── test_data_samplers.py
    ├── test_detectors.py
    ├── test_fbnet.py
    ├── test_feature_extractors.py
    ├── test_metric_logger.py
    ├── test_nms.py
    ├── test_predictors.py
    ├── test_rpn_heads.py
    ├── test_segmentation_mask.py
    └── utils.py
├── tools
    ├── cityscapes
    │   ├── convert_cityscapes_to_coco.py
    │   └── instances2dict_with_polygons.py
    ├── test_net.py
    └── train_net.py
└── visualise
    └── offset.m


/ABSTRACTIONS.md:
--------------------------------------------------------------------------------
 1 | ## Abstractions
 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to
 3 | have in mind are the following:
 4 | 
 5 | ### ImageList
 6 | In PyTorch, the first dimension of the input to the network generally represents
 7 | the batch dimension, and thus all elements of the same batch have the same
 8 | height / width.
 9 | In order to support images with different sizes and aspect ratios in the same
10 | batch, we created the `ImageList` class, which holds internally a batch of
11 | images (os possibly different sizes). The images are padded with zeros such that
12 | they have the same final size and batched over the first dimension. The original
13 | sizes of the images before padding are stored in the `image_sizes` attribute,
14 | and the batched tensor in `tensors`.
15 | We provide a convenience function `to_image_list` that accepts a few different
16 | input types, including a list of tensors, and returns an `ImageList` object.
17 | 
18 | ```python
19 | from maskrcnn_benchmark.structures.image_list import to_image_list
20 | 
21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)]
22 | batched_images = to_image_list(images)
23 | 
24 | # it is also possible to make the final batched image be a multiple of a number
25 | batched_images_32 = to_image_list(images, size_divisible=32)
26 | ```
27 | 
28 | ### BoxList
29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for
30 | a specific image, as well as the size of the image as a `(width, height)` tuple.
31 | It also contains a set of methods that allow to perform geometric
32 | transformations to the bounding boxes (such as cropping, scaling and flipping).
33 | The class accepts bounding boxes from two different input formats:
34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and
35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`.
36 | 
37 | Additionally, each `BoxList` instance can also hold arbitrary additional information
38 | for each bounding box, such as labels, visibility, probability scores etc.
39 | 
40 | Here is an example on how to create a `BoxList` from a list of coordinates:
41 | ```python
42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT
43 | 
44 | width = 100
45 | height = 200
46 | boxes = [
47 |   [0, 10, 50, 50],
48 |   [50, 20, 90, 60],
49 |   [10, 10, 50, 50]
50 | ]
51 | # create a BoxList with 3 boxes
52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy')
53 | 
54 | # perform some box transformations, has similar API as PIL.Image
55 | bbox_scaled = bbox.resize((width * 2, height * 3))
56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT)
57 | 
58 | # add labels for each bbox
59 | labels = torch.tensor([0, 10, 1])
60 | bbox.add_field('labels', labels)
61 | 
62 | # bbox also support a few operations, like indexing
63 | # here, selects boxes 0 and 2
64 | bbox_subset = bbox[[0, 2]]
65 | ```
66 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Mask-RCNN Benchmark
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `master`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Facebook's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces for indentation rather than tabs
34 | * 80 character line length
35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/)
36 | 
37 | ## License
38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed
39 | under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements:
 4 | - PyTorch 1.0 from a nightly release. It **will not** work with 1.0 nor 1.0.1. Installation instructions can be found in https://pytorch.org/get-started/locally/
 5 | - torchvision from master
 6 | - cocoapi
 7 | - yacs
 8 | - matplotlib
 9 | - GCC >= 4.9
10 | - OpenCV
11 | 
12 | 
13 | ### Option 1: Step-by-step installation
14 | 
15 | ```bash
16 | # first, make sure that your conda is setup properly with the right environment
17 | # for that, check that `which conda`, `which pip` and `which python` points to the
18 | # right path. From a clean conda env, this is what you need to do
19 | 
20 | conda create --name maskrcnn_benchmark
21 | conda activate maskrcnn_benchmark
22 | 
23 | # this installs the right pip and dependencies for the fresh python
24 | conda install ipython
25 | 
26 | # maskrcnn_benchmark and coco api dependencies
27 | pip install ninja yacs cython matplotlib tqdm opencv-python
28 | 
29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
30 | # we give the instructions for CUDA 9.0
31 | conda install -c pytorch pytorch-nightly torchvision cudatoolkit=9.0
32 | 
33 | export INSTALL_DIR=$PWD
34 | 
35 | # install pycocotools
36 | cd $INSTALL_DIR
37 | git clone https://github.com/cocodataset/cocoapi.git
38 | cd cocoapi/PythonAPI
39 | python setup.py build_ext install
40 | 
41 | # install apex
42 | cd $INSTALL_DIR
43 | git clone https://github.com/NVIDIA/apex.git
44 | cd apex
45 | python setup.py install --cuda_ext --cpp_ext
46 | 
47 | # install PyTorch Detection
48 | cd $INSTALL_DIR
49 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
50 | cd maskrcnn-benchmark
51 | 
52 | # the following will install the lib with
53 | # symbolic links, so that you can modify
54 | # the files if you want and won't need to
55 | # re-build it
56 | python setup.py build develop
57 | 
58 | 
59 | unset INSTALL_DIR
60 | 
61 | # or if you are on macOS
62 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
63 | ```
64 | 
65 | ### Option 2: Docker Image (Requires CUDA, Linux only)
66 | 
67 | Build image with defaults (`CUDA=9.0`, `CUDNN=7`, `FORCE_CUDA=1`):
68 | 
69 |     nvidia-docker build -t maskrcnn-benchmark docker/
70 |     
71 | Build image with other CUDA and CUDNN versions:
72 | 
73 |     nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/
74 |     
75 | Build image with FORCE_CUDA disabled:
76 | 
77 |     nvidia-docker build -t maskrcnn-benchmark --build-arg FORCE_CUDA=0 docker/
78 |     
79 | Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook):
80 | 
81 |     nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/
82 |     nvidia-docker run -td -p 8888:8888 -e PASSWORD=<password> -v <host-dir>:<container-dir> maskrcnn-benchmark-jupyter
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Facebook
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 |   META_ARCHITECTURE: "GeneralizedRCNN"
3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 |   TEST: ("coco_2014_minival",)
6 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 |   SIZE_DIVISIBILITY: 32
27 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 |   TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 |   SIZE_DIVISIBILITY: 32
30 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
 4 |   ROI_MASK_HEAD:
 5 |     PREDICTOR: "MaskRCNNC4Predictor"
 6 |     SHARE_BOX_FEATURE_EXTRACTOR: True
 7 |   MASK_ON: True
 8 | DATASETS:
 9 |   TEST: ("coco_2014_minival",)
10 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-152-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 |   SIZE_DIVISIBILITY: 32
39 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 | DATASETS:
25 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
26 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
27 | DATALOADER:
28 |   SIZE_DIVISIBILITY: 32
29 | SOLVER:
30 |   BASE_LR: 0.01
31 |   WEIGHT_DECAY: 0.0001
32 |   STEPS: (18000,)
33 |   MAX_ITER: 24000
34 | 


--------------------------------------------------------------------------------
/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 9
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
35 |   TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/dcn/README.md:
--------------------------------------------------------------------------------
 1 | ### Reference 
 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf)  
 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn)  
 4 | 
 5 | ### Performance
 6 | |      case                   | bbox AP | mask AP |
 7 | |----------------------------:|--------:|:-------:|
 8 | | R-50-FPN-dcn (implement)    |  39.8   |  -      |
 9 | | R-50-FPN-dcn (mmdetection)  |  40.0   |  -      |
10 | | R-50-FPN-mdcn (implement)   |  40.0   |  -      |
11 | | R-50-FPN-mdcn (mmdetection) |  40.3   |  -      |
12 | | R-50-FPN-dcn (implement)    |  40.8   |  36.8   |
13 | | R-50-FPN-dcn (mmdetection)  |  41.1   |  37.2   |
14 | | R-50-FPN-dcn (implement)    |  40.7   |  36.7   |
15 | | R-50-FPN-dcn (mmdetection)  |  41.4   |  37.4   |
16 | 
17 | 
18 | ### Note
19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details.  
20 | 
21 | 
22 | ### Usage
23 | add these three lines
24 | ```
25 | MODEL:
26 | 	RESNETS:
27 | 		# corresponding to C2,C3,C4,C5
28 | 		STAGE_WITH_DCN: (False, True, True, True)
29 | 		WITH_MODULATED_DCN: True
30 | 		DEFORMABLE_GROUPS: 1
31 | ```


--------------------------------------------------------------------------------
/configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: False
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 8 gpus
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | TEST:
44 |   IMS_PER_BATCH: 8
45 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: True
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 | DATASETS:
32 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
33 |   TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 |   SIZE_DIVISIBILITY: 32
36 | SOLVER:
37 |   # Assume 8 gpus
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 |   IMS_PER_BATCH: 16
43 | TEST:
44 |   IMS_PER_BATCH: 8
45 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: False
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   ROI_MASK_HEAD:
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
34 |     PREDICTOR: "MaskRCNNC4Predictor"
35 |     POOLER_RESOLUTION: 14
36 |     POOLER_SAMPLING_RATIO: 2
37 |     RESOLUTION: 28
38 |     SHARE_BOX_FEATURE_EXTRACTOR: False
39 |   MASK_ON: True
40 | DATASETS:
41 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
42 |   TEST: ("coco_2014_minival",)
43 | DATALOADER:
44 |   SIZE_DIVISIBILITY: 32
45 | SOLVER:
46 |   # Assume 8 gpus
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 |   IMS_PER_BATCH: 16
52 | TEST:
53 |   IMS_PER_BATCH: 8
54 | 
55 | 


--------------------------------------------------------------------------------
/configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS:
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STAGE_WITH_DCN: (False, True, True, True)
14 |     WITH_MODULATED_DCN: True
15 |     DEFORMABLE_GROUPS: 1
16 |   RPN:
17 |     USE_FPN: True
18 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
19 |     PRE_NMS_TOP_N_TRAIN: 2000
20 |     PRE_NMS_TOP_N_TEST: 1000
21 |     POST_NMS_TOP_N_TEST: 1000
22 |     FPN_POST_NMS_TOP_N_TEST: 1000
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   ROI_MASK_HEAD:
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
34 |     PREDICTOR: "MaskRCNNC4Predictor"
35 |     POOLER_RESOLUTION: 14
36 |     POOLER_SAMPLING_RATIO: 2
37 |     RESOLUTION: 28
38 |     SHARE_BOX_FEATURE_EXTRACTOR: False
39 |   MASK_ON: True
40 | DATASETS:
41 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
42 |   TEST: ("coco_2014_minival",)
43 | DATALOADER:
44 |   SIZE_DIVISIBILITY: 32
45 | SOLVER:
46 |   # Assume 8 gpus
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 |   IMS_PER_BATCH: 16
52 | TEST:
53 |   IMS_PER_BATCH: 8
54 | 


--------------------------------------------------------------------------------
/configs/dw/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STAGE_WITH_DW: (False, True, True, True)
 9 |     DYNAMIC_WEIGHT:
10 |       GROUP: 4
11 |       KERNEL: 3
12 |       DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12))
13 |       SHUFFLE: False
14 |       DEFORM: "deformatt"
15 |       INSERT_POS: "after3x3"
16 |       DOMAIN: "block"
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |   ROI_BOX_HEAD:
27 |     POOLER_RESOLUTION: 7
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     POOLER_SAMPLING_RATIO: 2
30 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
31 |     PREDICTOR: "FPNPredictor"
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   MASK_ON: True
41 | DATASETS:
42 |   TRAIN: ("coco_2017_train",)
43 |   TEST: ("coco_2017_val",)
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_101_FPN_111da_33_re_1x"
52 | 


--------------------------------------------------------------------------------
/configs/dw/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STAGE_WITH_DW: (False, False, False, True)
 9 |     DYNAMIC_WEIGHT: 
10 |       GROUP: 4
11 |       KERNEL: 3
12 |       DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12))
13 |       SHUFFLE: False
14 |       DEFORM: "deformatt"
15 |       INSERT_POS: "after3x3"
16 |       DOMAIN: "block"
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |   ROI_BOX_HEAD:
27 |     POOLER_RESOLUTION: 7
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     POOLER_SAMPLING_RATIO: 2
30 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
31 |     PREDICTOR: "FPNPredictor"
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   MASK_ON: True
41 | DATASETS:
42 |   TRAIN: ("coco_2017_train",)
43 |   TEST: ("coco_2017_val",)
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (60000, 80000)
50 |   MAX_ITER: 90000
51 | #OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_001_deDGMN_1x"
52 | 


--------------------------------------------------------------------------------
/configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STAGE_WITH_DW: (False, True, True, True)
 9 |     DYNAMIC_WEIGHT:
10 |       GROUP: 4
11 |       KERNEL: 3
12 |       DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12))
13 |       SHUFFLE: False
14 |       DEFORM: "deformatt"
15 |       INSERT_POS: "after3x3"
16 |       DOMAIN: "block"
17 |   RPN:
18 |     USE_FPN: True
19 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
20 |     PRE_NMS_TOP_N_TRAIN: 2000
21 |     PRE_NMS_TOP_N_TEST: 1000
22 |     POST_NMS_TOP_N_TEST: 1000
23 |     FPN_POST_NMS_TOP_N_TEST: 1000
24 |   ROI_HEADS:
25 |     USE_FPN: True
26 |   ROI_BOX_HEAD:
27 |     POOLER_RESOLUTION: 7
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     POOLER_SAMPLING_RATIO: 2
30 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
31 |     PREDICTOR: "FPNPredictor"
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   MASK_ON: True
41 | DATASETS:
42 |   TRAIN: ("coco_2017_train",)
43 |   TEST: ("coco_2017_val",)
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.02
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (120000, 160000)
50 |   MAX_ITER: 180000
51 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_111da_33_re_2x"


--------------------------------------------------------------------------------
/configs/dw/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |     STAGE_WITH_DW: (False, True, True, True)
12 |     DYNAMIC_WEIGHT:
13 |       GROUP: 4
14 |       KERNEL: 3
15 |       DILATION: ((1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12), (1, 4, 8, 12))
16 |       SHUFFLE: False
17 |       DEFORM: "deformatt"
18 |       INSERT_POS: "after3x3"
19 |       DOMAIN: "block"
20 |   RPN:
21 |     USE_FPN: True
22 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
23 |     PRE_NMS_TOP_N_TRAIN: 2000
24 |     PRE_NMS_TOP_N_TEST: 1000
25 |     POST_NMS_TOP_N_TEST: 1000
26 |     FPN_POST_NMS_TOP_N_TEST: 1000
27 |   ROI_HEADS:
28 |     USE_FPN: True
29 |   ROI_BOX_HEAD:
30 |     POOLER_RESOLUTION: 7
31 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
32 |     POOLER_SAMPLING_RATIO: 2
33 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
34 |     PREDICTOR: "FPNPredictor"
35 |   ROI_MASK_HEAD:
36 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
37 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
38 |     PREDICTOR: "MaskRCNNC4Predictor"
39 |     POOLER_RESOLUTION: 14
40 |     POOLER_SAMPLING_RATIO: 2
41 |     RESOLUTION: 28
42 |     SHARE_BOX_FEATURE_EXTRACTOR: False
43 |   MASK_ON: True
44 | DATASETS:
45 |   TRAIN: ("coco_2017_train",)
46 |   TEST: ("coco_2017_test",)
47 | DATALOADER:
48 |   SIZE_DIVISIBILITY: 32
49 | SOLVER:
50 |   BASE_LR: 0.01
51 |   WEIGHT_DECAY: 0.0001
52 |   STEPS: (120000, 160000)
53 |   MAX_ITER: 180000
54 |   IMS_PER_BATCH: 8
55 | OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_111da_33_re_1x"
56 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 | DATASETS:
 8 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
 9 |   TEST: ("coco_2014_minival",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WEIGHT_DECAY: 0.0001
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 |   IMS_PER_BATCH: 8
16 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 |   TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 |   BASE_LR: 0.02
30 |   WEIGHT_DECAY: 0.0001
31 |   STEPS: (60000, 80000)
32 |   MAX_ITER: 90000
33 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RPN:
 7 |     USE_FPN: True
 8 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
 9 |     PRE_NMS_TOP_N_TRAIN: 2000
10 |     PRE_NMS_TOP_N_TEST: 1000
11 |     POST_NMS_TOP_N_TEST: 1000
12 |     FPN_POST_NMS_TOP_N_TEST: 1000
13 |   ROI_HEADS:
14 |     USE_FPN: True
15 |   ROI_BOX_HEAD:
16 |     POOLER_RESOLUTION: 7
17 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
18 |     POOLER_SAMPLING_RATIO: 2
19 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
20 |     PREDICTOR: "FPNPredictor"
21 |   RESNETS:
22 |     BACKBONE_OUT_CHANNELS: 256
23 |     STRIDE_IN_1X1: False
24 |     NUM_GROUPS: 32
25 |     WIDTH_PER_GROUP: 8
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 |   SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 |   BASE_LR: 0.01
33 |   WEIGHT_DECAY: 0.0001
34 |   STEPS: (120000, 160000)
35 |   MAX_ITER: 180000
36 |   IMS_PER_BATCH: 8
37 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 100
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 512
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (320, )
40 |   MAX_SIZE_TRAIN: 640
41 |   MIN_SIZE_TEST: 320
42 |   MAX_SIZE_TEST: 640
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.06
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (60000, 80000)
34 |   MAX_ITER: 90000
35 |   IMS_PER_BATCH: 128  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "cham_v1a"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |   RPN:
12 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 |     ANCHOR_STRIDE: (16, )
14 |     BATCH_SIZE_PER_IMAGE: 256
15 |     PRE_NMS_TOP_N_TRAIN: 6000
16 |     PRE_NMS_TOP_N_TEST: 6000
17 |     POST_NMS_TOP_N_TRAIN: 2000
18 |     POST_NMS_TOP_N_TEST: 200
19 |     RPN_HEAD: FBNet.rpn_head
20 |   ROI_HEADS:
21 |     BATCH_SIZE_PER_IMAGE: 128
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 6
24 |     FEATURE_EXTRACTOR: FBNet.roi_head
25 |     NUM_CLASSES: 81
26 | DATASETS:
27 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 |   TEST: ("coco_2014_minival",)
29 | SOLVER:
30 |   BASE_LR: 0.045
31 |   WARMUP_FACTOR: 0.1
32 |   WEIGHT_DECAY: 0.0001
33 |   STEPS: (90000, 120000)
34 |   MAX_ITER: 135000
35 |   IMS_PER_BATCH: 96  # for 8GPUs
36 | # TEST:
37 | #   IMS_PER_BATCH: 8
38 | INPUT:
39 |   MIN_SIZE_TRAIN: (600, )
40 |   MAX_SIZE_TRAIN: 1000
41 |   MIN_SIZE_TEST: 600
42 |   MAX_SIZE_TEST: 1000
43 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
44 |   PIXEL_STD: [57.375, 57.12, 58.395]
45 | 


--------------------------------------------------------------------------------
/configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 2
24 |   ROI_KEYPOINT_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 |     PREDICTOR: "KeypointRCNNPredictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 56
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   KEYPOINT_ON: True
33 | DATASETS:
34 |   TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 |   TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.02
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (60000, 80000)
44 |   MAX_ITER: 90000
45 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_test",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_MASK_HEAD:
 8 |     PREDICTOR: "MaskRCNNC4Predictor"
 9 |     SHARE_BOX_FEATURE_EXTRACTOR: True
10 |   MASK_ON: True
11 | DATASETS:
12 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 |   TEST: ("coco_2014_minival",)
14 | SOLVER:
15 |   BASE_LR: 0.01
16 |   WEIGHT_DECAY: 0.0001
17 |   STEPS: (120000, 160000)
18 |   MAX_ITER: 180000
19 |   IMS_PER_BATCH: 8
20 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |   ROI_MASK_HEAD:
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 |     PREDICTOR: "MaskRCNNC4Predictor"
27 |     POOLER_RESOLUTION: 14
28 |     POOLER_SAMPLING_RATIO: 2
29 |     RESOLUTION: 28
30 |     SHARE_BOX_FEATURE_EXTRACTOR: False
31 |   MASK_ON: True
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.02
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | #OUTPUT_DIR: "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x"
43 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   MASK_ON: True
35 | DATASETS:
36 |   TRAIN: ("coco_2017_train",)
37 |   TEST: ("coco_2017_test",)
38 | DATALOADER:
39 |   SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 |   BASE_LR: 0.01
42 |   WEIGHT_DECAY: 0.0001
43 |   STEPS: (120000, 160000)
44 |   MAX_ITER: 180000
45 |   IMS_PER_BATCH: 8
46 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "default"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: -1.0
12 |   RPN:
13 |     ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 100
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 512
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (320, )
48 |   MAX_SIZE_TRAIN: 640
49 |   MIN_SIZE_TEST: 320
50 |   MAX_SIZE_TEST: 640
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     CONV_BODY: FBNet
 5 |   FBNET:
 6 |     ARCH: "xirb16d_dsmask"
 7 |     BN_TYPE: "bn"
 8 |     WIDTH_DIVISOR: 8
 9 |     DW_CONV_SKIP_BN: True
10 |     DW_CONV_SKIP_RELU: True
11 |     DET_HEAD_LAST_SCALE: 0.0
12 |   RPN:
13 |     ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 |     ANCHOR_STRIDE: (16, )
15 |     BATCH_SIZE_PER_IMAGE: 256
16 |     PRE_NMS_TOP_N_TRAIN: 6000
17 |     PRE_NMS_TOP_N_TEST: 6000
18 |     POST_NMS_TOP_N_TRAIN: 2000
19 |     POST_NMS_TOP_N_TEST: 200
20 |     RPN_HEAD: FBNet.rpn_head
21 |   ROI_HEADS:
22 |     BATCH_SIZE_PER_IMAGE: 256
23 |   ROI_BOX_HEAD:
24 |     POOLER_RESOLUTION: 6
25 |     FEATURE_EXTRACTOR: FBNet.roi_head
26 |     NUM_CLASSES: 81
27 |   ROI_MASK_HEAD:
28 |     POOLER_RESOLUTION: 6
29 |     FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 |     PREDICTOR: "MaskRCNNConv1x1Predictor"
31 |     RESOLUTION: 12
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   MASK_ON: True
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | SOLVER:
38 |   BASE_LR: 0.06
39 |   WARMUP_FACTOR: 0.1
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   IMS_PER_BATCH: 128  # for 8GPUs
44 | # TEST:
45 | #   IMS_PER_BATCH: 8
46 | INPUT:
47 |   MIN_SIZE_TRAIN: (600, )
48 |   MAX_SIZE_TRAIN: 1000
49 |   MIN_SIZE_TEST: 600
50 |   MAX_SIZE_TEST: 1000
51 |   PIXEL_MEAN: [103.53, 116.28, 123.675]
52 |   PIXEL_STD: [57.375, 57.12, 58.395]
53 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/README.md:
--------------------------------------------------------------------------------
 1 | ### Group Normalization
 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494)  
 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883)  
 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md)  
 5 | 
 6 | 
 7 | ### Performance
 8 | |      case                  |    Type      |  lr schd  |  im/gpu | bbox AP | mask AP |
 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:|
10 | |   R-50-FPN, GN (paper)     | finetune     |    2x     |   2     |   40.3  |  35.7   |
11 | |   R-50-FPN, GN (implement) | finetune     |    2x     |   2     |   40.2  |  36.0   |
12 | |   R-50-FPN, GN (paper)     | from scratch |    3x     |   2     |   39.5  |  35.2   |
13 | |   R-50-FPN, GN (implement) | from scratch |    3x     |   2     |   38.9  |  35.1   |
14 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | DATALOADER:
40 |   SIZE_DIVISIBILITY: 32
41 | SOLVER:
42 |   # Assume 8 gpus
43 |   BASE_LR: 0.02
44 |   WEIGHT_DECAY: 0.0001
45 |   STEPS: (60000, 80000)
46 |   MAX_ITER: 90000
47 |   IMS_PER_BATCH: 16
48 | TEST:
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 | DATASETS:
39 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
40 |   TEST: ("coco_2014_minival",)
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 8 gpus
45 |   BASE_LR: 0.02
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (60000, 80000)
48 |   MAX_ITER: 90000
49 |   IMS_PER_BATCH: 16
50 | TEST:
51 |   IMS_PER_BATCH: 8
52 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 |     PREDICTOR: "FPNPredictor"
36 |   ROI_MASK_HEAD:
37 |     USE_GN: True # use GN for mask head
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     CONV_LAYERS: (256, 256, 256, 256)
40 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
41 |     PREDICTOR: "MaskRCNNC4Predictor"
42 |     POOLER_RESOLUTION: 14
43 |     POOLER_SAMPLING_RATIO: 2
44 |     RESOLUTION: 28
45 |     SHARE_BOX_FEATURE_EXTRACTOR: False
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
49 |   TEST: ("coco_2014_minival",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   # Assume 8 gpus
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (60000, 80000)
57 |   MAX_ITER: 90000
58 |   IMS_PER_BATCH: 16
59 | TEST:
60 |   IMS_PER_BATCH: 8
61 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |   RESNETS: # use GN for backbone
12 |     BACKBONE_OUT_CHANNELS: 256
13 |     STRIDE_IN_1X1: False
14 |     TRANS_FUNC: "BottleneckWithGN"
15 |     STEM_FUNC: "StemWithGN"
16 |   FPN:
17 |     USE_GN: True # use GN for FPN
18 |   RPN:
19 |     USE_FPN: True
20 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 |     PRE_NMS_TOP_N_TRAIN: 2000
22 |     PRE_NMS_TOP_N_TEST: 1000
23 |     POST_NMS_TOP_N_TEST: 1000
24 |     FPN_POST_NMS_TOP_N_TEST: 1000
25 |   ROI_HEADS:
26 |     USE_FPN: True
27 |     BATCH_SIZE_PER_IMAGE: 512
28 |     POSITIVE_FRACTION: 0.25
29 |   ROI_BOX_HEAD:
30 |     USE_GN: True # use GN for bbox head
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 |     POOLER_SAMPLING_RATIO: 2
34 |     CONV_HEAD_DIM: 256
35 |     NUM_STACKED_CONVS: 4
36 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 |     PREDICTOR: "FPNPredictor"
38 |   ROI_MASK_HEAD:
39 |     USE_GN: True # use GN for mask head
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     CONV_LAYERS: (256, 256, 256, 256)
42 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
43 |     PREDICTOR: "MaskRCNNC4Predictor"
44 |     POOLER_RESOLUTION: 14
45 |     POOLER_SAMPLING_RATIO: 2
46 |     RESOLUTION: 28
47 |     SHARE_BOX_FEATURE_EXTRACTOR: False
48 |   MASK_ON: True
49 | DATASETS:
50 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
51 |   TEST: ("coco_2014_minival",)
52 | DATALOADER:
53 |   SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 |   # Assume 8 gpus
56 |   BASE_LR: 0.02
57 |   WEIGHT_DECAY: 0.0001
58 |   STEPS: (60000, 80000)
59 |   MAX_ITER: 90000
60 |   IMS_PER_BATCH: 16
61 | TEST:
62 |   IMS_PER_BATCH: 8
63 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 | DATASETS:
38 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
39 |   TEST: ("coco_2014_minival",)
40 | DATALOADER:
41 |   SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 |   # Assume 8 gpus
44 |   BASE_LR: 0.02
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (210000, 250000)
47 |   MAX_ITER: 270000
48 |   IMS_PER_BATCH: 16
49 | TEST:
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 | DATASETS:
40 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
41 |   TEST: ("coco_2014_minival",)
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 8 gpus
46 |   BASE_LR: 0.02
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (210000, 250000)
49 |   MAX_ITER: 270000
50 |   IMS_PER_BATCH: 16
51 | TEST:
52 |   IMS_PER_BATCH: 8
53 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 |     PREDICTOR: "FPNPredictor"
37 |   ROI_MASK_HEAD:
38 |     USE_GN: True # use GN for mask head
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     CONV_LAYERS: (256, 256, 256, 256)
41 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
42 |     PREDICTOR: "MaskRCNNC4Predictor"
43 |     POOLER_RESOLUTION: 14
44 |     POOLER_SAMPLING_RATIO: 2
45 |     RESOLUTION: 28
46 |     SHARE_BOX_FEATURE_EXTRACTOR: False
47 |   MASK_ON: True
48 | DATASETS:
49 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
50 |   TEST: ("coco_2014_minival",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   # Assume 8 gpus
55 |   BASE_LR: 0.02
56 |   WEIGHT_DECAY: 0.0001
57 |   STEPS: (210000, 250000)
58 |   MAX_ITER: 270000
59 |   IMS_PER_BATCH: 16
60 | TEST:
61 |   IMS_PER_BATCH: 8
62 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | INPUT:
 2 |   MIN_SIZE_TRAIN: (800,)
 3 |   MAX_SIZE_TRAIN: 1333
 4 |   MIN_SIZE_TEST: 800
 5 |   MAX_SIZE_TEST: 1333
 6 | MODEL:
 7 |   META_ARCHITECTURE: "GeneralizedRCNN"
 8 |   WEIGHT: "" # no pretrained model
 9 |   BACKBONE:
10 |     CONV_BODY: "R-50-FPN"
11 |     FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 |   RESNETS: # use GN for backbone
13 |     BACKBONE_OUT_CHANNELS: 256
14 |     STRIDE_IN_1X1: False
15 |     TRANS_FUNC: "BottleneckWithGN"
16 |     STEM_FUNC: "StemWithGN"
17 |   FPN:
18 |     USE_GN: True # use GN for FPN
19 |   RPN:
20 |     USE_FPN: True
21 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 |     PRE_NMS_TOP_N_TRAIN: 2000
23 |     PRE_NMS_TOP_N_TEST: 1000
24 |     POST_NMS_TOP_N_TEST: 1000
25 |     FPN_POST_NMS_TOP_N_TEST: 1000
26 |   ROI_HEADS:
27 |     USE_FPN: True
28 |     BATCH_SIZE_PER_IMAGE: 512
29 |     POSITIVE_FRACTION: 0.25
30 |   ROI_BOX_HEAD:
31 |     USE_GN: True # use GN for bbox head
32 |     POOLER_RESOLUTION: 7
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     POOLER_SAMPLING_RATIO: 2
35 |     CONV_HEAD_DIM: 256
36 |     NUM_STACKED_CONVS: 4
37 |     FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 |     PREDICTOR: "FPNPredictor"
39 |   ROI_MASK_HEAD:
40 |     USE_GN: True # use GN for mask head
41 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
42 |     CONV_LAYERS: (256, 256, 256, 256)
43 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
44 |     PREDICTOR: "MaskRCNNC4Predictor"
45 |     POOLER_RESOLUTION: 14
46 |     POOLER_SAMPLING_RATIO: 2
47 |     RESOLUTION: 28
48 |     SHARE_BOX_FEATURE_EXTRACTOR: False
49 |   MASK_ON: True
50 | DATASETS:
51 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
52 |   TEST: ("coco_2014_minival",)
53 | DATALOADER:
54 |   SIZE_DIVISIBILITY: 32
55 | SOLVER:
56 |   # Assume 8 gpus
57 |   BASE_LR: 0.02
58 |   WEIGHT_DECAY: 0.0001
59 |   STEPS: (210000, 250000)
60 |   MAX_ITER: 270000
61 |   IMS_PER_BATCH: 16
62 | TEST:
63 |   IMS_PER_BATCH: 8
64 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.001
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (50000, )
17 |   MAX_ITER: 70000
18 |   IMS_PER_BATCH: 1
19 | TEST:
20 |   IMS_PER_BATCH: 1
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 300
 7 |     ANCHOR_SIZES: (128, 256, 512)
 8 |   ROI_BOX_HEAD:
 9 |     NUM_CLASSES: 21
10 | DATASETS:
11 |   TRAIN: ("voc_2007_train", "voc_2007_val")
12 |   TEST: ("voc_2007_test",)
13 | SOLVER:
14 |   BASE_LR: 0.004
15 |   WEIGHT_DECAY: 0.0001
16 |   STEPS: (12500, )
17 |   MAX_ITER: 17500
18 |   IMS_PER_BATCH: 4
19 | TEST:
20 |   IMS_PER_BATCH: 4
21 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |   ROI_BOX_HEAD:
18 |     POOLER_RESOLUTION: 7
19 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 |     POOLER_SAMPLING_RATIO: 2
21 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 |     PREDICTOR: "FPNPredictor"
23 |     NUM_CLASSES: 21
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("voc_2012_train_cocostyle",)
35 |   TEST: ("voc_2012_val_cocostyle",)
36 | DATALOADER:
37 |   SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (18000,)
42 |   MAX_ITER: 24000
43 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 | DATASETS:
10 |   TRAIN: ("coco_2014_minival",)
11 |   TEST: ("coco_2014_minival",)
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (600,)
14 |   MAX_SIZE_TRAIN: 1000
15 |   MIN_SIZE_TEST: 800
16 |   MAX_SIZE_TEST: 1000
17 | SOLVER:
18 |   BASE_LR: 0.005
19 |   WEIGHT_DECAY: 0.0001
20 |   STEPS: (1500,)
21 |   MAX_ITER: 2000
22 |   IMS_PER_BATCH: 2
23 | TEST:
24 |   IMS_PER_BATCH: 2
25 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 | DATASETS:
25 |   TRAIN: ("coco_2014_minival",)
26 |   TEST: ("coco_2014_minival",)
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (600,)
29 |   MAX_SIZE_TRAIN: 1000
30 |   MIN_SIZE_TEST: 800
31 |   MAX_SIZE_TEST: 1000
32 | DATALOADER:
33 |   SIZE_DIVISIBILITY: 32
34 | SOLVER:
35 |   BASE_LR: 0.005
36 |   WEIGHT_DECAY: 0.0001
37 |   STEPS: (1500,)
38 |   MAX_ITER: 2000
39 |   IMS_PER_BATCH: 4
40 | TEST:
41 |   IMS_PER_BATCH: 2
42 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 | DATASETS:
28 |   TRAIN: ("coco_2014_minival",)
29 |   TEST: ("coco_2014_minival",)
30 | INPUT:
31 |   MIN_SIZE_TRAIN: (600,)
32 |   MAX_SIZE_TRAIN: 1000
33 |   MIN_SIZE_TEST: 800
34 |   MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 |   SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 |   BASE_LR: 0.005
39 |   WEIGHT_DECAY: 0.0001
40 |   STEPS: (1500,)
41 |   MAX_ITER: 2000
42 |   IMS_PER_BATCH: 2
43 | TEST:
44 |   IMS_PER_BATCH: 2
45 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |     NUM_CLASSES: 2
25 |   ROI_KEYPOINT_HEAD:
26 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
27 |     FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
28 |     PREDICTOR: "KeypointRCNNPredictor"
29 |     POOLER_RESOLUTION: 14
30 |     POOLER_SAMPLING_RATIO: 2
31 |     RESOLUTION: 56
32 |     SHARE_BOX_FEATURE_EXTRACTOR: False
33 |   KEYPOINT_ON: True
34 | DATASETS:
35 |   TRAIN: ("keypoints_coco_2014_minival",)
36 |   TEST: ("keypoints_coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
39 |   MAX_SIZE_TRAIN: 1000
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1000
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (1500,)
48 |   MAX_ITER: 2000
49 |   IMS_PER_BATCH: 4
50 | TEST:
51 |   IMS_PER_BATCH: 2
52 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN:
 5 |     PRE_NMS_TOP_N_TEST: 6000
 6 |     POST_NMS_TOP_N_TEST: 1000
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |   ROI_MASK_HEAD:
10 |     PREDICTOR: "MaskRCNNC4Predictor"
11 |     SHARE_BOX_FEATURE_EXTRACTOR: True
12 |   MASK_ON: True
13 | DATASETS:
14 |   TRAIN: ("coco_2014_minival",)
15 |   TEST: ("coco_2014_minival",)
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (600,)
18 |   MAX_SIZE_TRAIN: 1000
19 |   MIN_SIZE_TEST: 800
20 |   MAX_SIZE_TEST: 1000
21 | SOLVER:
22 |   BASE_LR: 0.005
23 |   WEIGHT_DECAY: 0.0001
24 |   STEPS: (1500,)
25 |   MAX_ITER: 2000
26 |   IMS_PER_BATCH: 4
27 | TEST:
28 |   IMS_PER_BATCH: 2
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-50-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |   RPN:
 9 |     USE_FPN: True
10 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 |     PRE_NMS_TOP_N_TRAIN: 2000
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 1000
14 |     FPN_POST_NMS_TOP_N_TEST: 1000
15 |   ROI_HEADS:
16 |     USE_FPN: True
17 |     BATCH_SIZE_PER_IMAGE: 256
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   MASK_ON: True
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (1500,)
47 |   MAX_ITER: 2000
48 |   IMS_PER_BATCH: 4
49 | TEST:
50 |   IMS_PER_BATCH: 2
51 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   BACKBONE:
 5 |     CONV_BODY: "R-101-FPN"
 6 |   RESNETS:
 7 |     BACKBONE_OUT_CHANNELS: 256
 8 |     STRIDE_IN_1X1: False
 9 |     NUM_GROUPS: 32
10 |     WIDTH_PER_GROUP: 8
11 |   RPN:
12 |     USE_FPN: True
13 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 |     PRE_NMS_TOP_N_TRAIN: 2000
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 1000
17 |     FPN_POST_NMS_TOP_N_TEST: 1000
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     BATCH_SIZE_PER_IMAGE: 256
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   MASK_ON: True
36 | DATASETS:
37 |   TRAIN: ("coco_2014_minival",)
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (600,)
41 |   MAX_SIZE_TRAIN: 1000
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   BASE_LR: 0.005
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (1500,)
50 |   MAX_ITER: 2000
51 |   IMS_PER_BATCH: 2
52 | TEST:
53 |   IMS_PER_BATCH: 2
54 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_minival",)
10 |   TEST: ("coco_2014_minival",)
11 | INPUT:
12 |   MIN_SIZE_TRAIN: (600,)
13 |   MAX_SIZE_TRAIN: 1000
14 |   MIN_SIZE_TEST: 800
15 |   MAX_SIZE_TEST: 1000
16 | SOLVER:
17 |   BASE_LR: 0.005
18 |   WEIGHT_DECAY: 0.0001
19 |   STEPS: (1500,)
20 |   MAX_ITER: 2000
21 |   IMS_PER_BATCH: 4
22 | TEST:
23 |   IMS_PER_BATCH: 2
24 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_minival",)
17 |   TEST: ("coco_2014_minival",)
18 | INPUT:
19 |   MIN_SIZE_TRAIN: (600,)
20 |   MAX_SIZE_TRAIN: 1000
21 |   MIN_SIZE_TEST: 800
22 |   MAX_SIZE_TEST: 1000
23 | DATALOADER:
24 |   SIZE_DIVISIBILITY: 32
25 | SOLVER:
26 |   BASE_LR: 0.005
27 |   WEIGHT_DECAY: 0.0001
28 |   STEPS: (1500,)
29 |   MAX_ITER: 2000
30 |   IMS_PER_BATCH: 4
31 | TEST:
32 |   IMS_PER_BATCH: 2
33 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800, )
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800, )
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (800,)
38 |   MAX_SIZE_TRAIN: 1333
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   # Assume 4 gpus
45 |   BASE_LR: 0.005
46 |   WEIGHT_DECAY: 0.0001
47 |   STEPS: (120000, 160000)
48 |   MAX_ITER: 180000
49 |   IMS_PER_BATCH: 8
50 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     FG_IOU_THRESHOLD: 0.5
32 |     BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 |   TRAIN: ("coco_2014_minival",)
35 |   TEST: ("coco_2014_minival",)
36 | INPUT:
37 |   MIN_SIZE_TRAIN: (600,)
38 |   MAX_SIZE_TRAIN: 1000
39 |   MIN_SIZE_TEST: 800
40 |   MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 |   SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 |   BASE_LR: 0.005
45 |   WEIGHT_DECAY: 0.0001
46 |   STEPS: (3500,)
47 |   MAX_ITER: 4000
48 |   IMS_PER_BATCH: 4
49 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |   RPN:
11 |     USE_FPN: True
12 |     FG_IOU_THRESHOLD: 0.5
13 |     BG_IOU_THRESHOLD: 0.4
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TRAIN: 2000
16 |     PRE_NMS_TOP_N_TEST: 1000
17 |     POST_NMS_TOP_N_TEST: 1000
18 |     FPN_POST_NMS_TOP_N_TEST: 1000
19 |   ROI_HEADS:
20 |     USE_FPN: True
21 |     BATCH_SIZE_PER_IMAGE: 256
22 |   ROI_BOX_HEAD:
23 |     POOLER_RESOLUTION: 7
24 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 |     POOLER_SAMPLING_RATIO: 2
26 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 |     PREDICTOR: "FPNPredictor"
28 |   RETINANET:
29 |     SCALES_PER_OCTAVE: 3
30 |     STRADDLE_THRESH: -1
31 |     USE_C5: False
32 |     FG_IOU_THRESHOLD: 0.5
33 |     BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 |   TEST: ("coco_2014_minival",)
37 | INPUT:
38 |   MIN_SIZE_TRAIN: (800,)
39 |   MAX_SIZE_TRAIN: 1333
40 |   MIN_SIZE_TEST: 800
41 |   MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 |   SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 |   # Assume 4 gpus
46 |   BASE_LR: 0.005
47 |   WEIGHT_DECAY: 0.0001
48 |   STEPS: (120000, 160000)
49 |   MAX_ITER: 180000
50 |   IMS_PER_BATCH: 8
51 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   RETINANET_ON: True
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN-RETINANET"
 8 |   RESNETS:
 9 |     BACKBONE_OUT_CHANNELS: 256
10 |     STRIDE_IN_1X1: False
11 |     NUM_GROUPS: 32
12 |     WIDTH_PER_GROUP: 8
13 |   RPN:
14 |     USE_FPN: True
15 |     FG_IOU_THRESHOLD: 0.5
16 |     BG_IOU_THRESHOLD: 0.4
17 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |     PRE_NMS_TOP_N_TRAIN: 2000
19 |     PRE_NMS_TOP_N_TEST: 1000
20 |     POST_NMS_TOP_N_TEST: 1000
21 |     FPN_POST_NMS_TOP_N_TEST: 1000
22 |   ROI_HEADS:
23 |     USE_FPN: True
24 |     BATCH_SIZE_PER_IMAGE: 256
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |   RETINANET:
32 |     SCALES_PER_OCTAVE: 3
33 |     STRADDLE_THRESH: -1
34 |     FG_IOU_THRESHOLD: 0.5
35 |     BG_IOU_THRESHOLD: 0.4
36 | DATASETS:
37 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 |   TEST: ("coco_2014_minival",)
39 | INPUT:
40 |   MIN_SIZE_TRAIN: (800, )
41 |   MAX_SIZE_TRAIN: 1333
42 |   MIN_SIZE_TEST: 800
43 |   MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 |   SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 |   # Assume 4 gpus
48 |   BASE_LR: 0.0025
49 |   WEIGHT_DECAY: 0.0001
50 |   STEPS: (240000, 320000)
51 |   MAX_ITER: 360000
52 |   IMS_PER_BATCH: 4
53 | 


--------------------------------------------------------------------------------
/configs/rpn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   RPN:
 6 |     PRE_NMS_TOP_N_TEST: 12000
 7 |     POST_NMS_TOP_N_TEST: 2000
 8 | DATASETS:
 9 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
10 |   TEST: ("coco_2014_minival",)
11 | SOLVER:
12 |   BASE_LR: 0.02
13 |   WEIGHT_DECAY: 0.0001
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | 


--------------------------------------------------------------------------------
/configs/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-50-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TEST: 1000
13 |     POST_NMS_TOP_N_TEST: 2000
14 |     FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 |   TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 |   SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 |   BASE_LR: 0.02
22 |   WEIGHT_DECAY: 0.0001
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | 


--------------------------------------------------------------------------------
/configs/rpn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
 4 |   RPN_ONLY: True
 5 |   BACKBONE:
 6 |     CONV_BODY: "R-101-FPN"
 7 |   RESNETS:
 8 |     BACKBONE_OUT_CHANNELS: 256
 9 |     STRIDE_IN_1X1: False
10 |     NUM_GROUPS: 32
11 |     WIDTH_PER_GROUP: 8
12 |   RPN:
13 |     USE_FPN: True
14 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 |     PRE_NMS_TOP_N_TEST: 1000
16 |     POST_NMS_TOP_N_TEST: 2000
17 |     FPN_POST_NMS_TOP_N_TEST: 2000
18 | DATASETS:
19 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
20 |   TEST: ("coco_2014_minival",)
21 | DATALOADER:
22 |   SIZE_DIVISIBILITY: 32
23 | SOLVER:
24 |   BASE_LR: 0.02
25 |   WEIGHT_DECAY: 0.0001
26 |   STEPS: (60000, 80000)
27 |   MAX_ITER: 90000
28 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | ## Webcam and Jupyter notebook demo
 2 | 
 3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
 4 | 
 5 | 
 6 | ### With your preferred environment
 7 | 
 8 | You can start it by running it from this folder, using one of the following commands:
 9 | ```bash
10 | # by default, it runs on the GPU
11 | # for best results, use min-image-size 800
12 | python webcam.py --min-image-size 800
13 | # can also run it on the CPU
14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
15 | # or change the model that you want to use
16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
19 | ```
20 | 
21 | ### With Docker
22 | 
23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
24 | 
25 | Adjust permissions of the X server host (be careful with this step, refer to 
26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
27 | 
28 | ```bash
29 | xhost +
30 | ``` 
31 | 
32 | Then run a container with the demo:
33 |  
34 | ```
35 | docker run --rm -it \
36 |     -e DISPLAY=${DISPLAY} \
37 |     --privileged \
38 |     -v /tmp/.X11-unix:/tmp/.X11-unix \
39 |     --device=/dev/video0:/dev/video0 \
40 |     --ipc=host maskrcnn-benchmark \
41 |     python demo/webcam.py --min-image-size 300 \
42 |     --config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
43 | ```
44 | 
45 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, 
46 | the volume mapping may vary depending on your platform*
47 | 


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png


--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png


--------------------------------------------------------------------------------
/demo/test_visualise.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.pylab as pylab
 3 | import os
 4 | import cv2
 5 | 
 6 | from PIL import Image
 7 | import numpy as np
 8 | 
 9 | # this makes our figures bigger
10 | pylab.rcParams['figure.figsize'] = 20, 12
11 | 
12 | from maskrcnn_benchmark.config import cfg
13 | from predictor import COCODemo
14 | 
15 | config_file = "configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml"
16 | 
17 | # update the config options with the config file
18 | cfg.merge_from_file(config_file)
19 | cfg.MODEL.WEIGHT = 'models/e2e_mask_rcnn_X_101_32x8d_FPN_1x/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth'
20 | # manual override some options
21 | # cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
22 | 
23 | coco_demo = COCODemo(
24 |     cfg,
25 |     min_image_size=800,
26 |     confidence_threshold=0.7,
27 | )
28 | 
29 | # val_path = 'datasets/coco/val2017/'  # this is the validation image data
30 | val_path = '/home/lz/Downloads/sample/'
31 | # output_path = 'models/e2e_mask_rcnn_R_50_FPN_011d_33_g4_14812_sum_re_1x/output/'
32 | output_path = '/home/lz/Downloads/output/'
33 | imglistval = os.listdir(val_path)
34 | for name in imglistval:
35 |     print(name)
36 |     imgfile = val_path + name
37 |     pil_image = Image.open(imgfile).convert("RGB")
38 |     image = np.array(pil_image)[:, :, [2, 1, 0]]
39 | 
40 |     predictions = coco_demo.run_on_opencv_image(image)  # forward predict
41 | 
42 |     savefile = output_path + name
43 | 
44 |     cv2.imwrite(savefile, predictions)
45 | 


--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import cv2
 4 | 
 5 | from maskrcnn_benchmark.config import cfg
 6 | from predictor import COCODemo
 7 | 
 8 | import time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |     )
19 |     parser.add_argument(
20 |         "--confidence-threshold",
21 |         type=float,
22 |         default=0.7,
23 |         help="Minimum score for the prediction to be shown",
24 |     )
25 |     parser.add_argument(
26 |         "--min-image-size",
27 |         type=int,
28 |         default=224,
29 |         help="Smallest size of the image to feed to the model. "
30 |             "Model was trained with 800, which gives best results",
31 |     )
32 |     parser.add_argument(
33 |         "--show-mask-heatmaps",
34 |         dest="show_mask_heatmaps",
35 |         help="Show a heatmap probability for the top masks-per-dim masks",
36 |         action="store_true",
37 |     )
38 |     parser.add_argument(
39 |         "--masks-per-dim",
40 |         type=int,
41 |         default=2,
42 |         help="Number of heatmaps per dimension to show",
43 |     )
44 |     parser.add_argument(
45 |         "opts",
46 |         help="Modify model config options using the command-line",
47 |         default=None,
48 |         nargs=argparse.REMAINDER,
49 |     )
50 | 
51 |     args = parser.parse_args()
52 | 
53 |     # load config from file and command-line arguments
54 |     cfg.merge_from_file(args.config_file)
55 |     cfg.merge_from_list(args.opts)
56 |     cfg.freeze()
57 | 
58 |     # prepare object that handles inference plus adds predictions on top of image
59 |     coco_demo = COCODemo(
60 |         cfg,
61 |         confidence_threshold=args.confidence_threshold,
62 |         show_mask_heatmaps=args.show_mask_heatmaps,
63 |         masks_per_dim=args.masks_per_dim,
64 |         min_image_size=args.min_image_size,
65 |     )
66 | 
67 |     cam = cv2.VideoCapture(0)
68 |     while True:
69 |         start_time = time.time()
70 |         ret_val, img = cam.read()
71 |         composite = coco_demo.run_on_opencv_image(img)
72 |         print("Time: {:.2f} s / img".format(time.time() - start_time))
73 |         cv2.imshow("COCO detections", composite)
74 |         if cv2.waitKey(1) == 27:
75 |             break  # esc to quit
76 |     cv2.destroyAllWindows()
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \
11 |  && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev
12 | 
13 | # Install Miniconda
14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
15 |  && chmod +x /miniconda.sh \
16 |  && /miniconda.sh -b -p /miniconda \
17 |  && rm /miniconda.sh
18 | 
19 | ENV PATH=/miniconda/bin:$PATH
20 | 
21 | # Create a Python 3.6 environment
22 | RUN /miniconda/bin/conda install -y conda-build \
23 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
24 |  && /miniconda/bin/conda clean -ya
25 | 
26 | ENV CONDA_DEFAULT_ENV=py36
27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
28 | ENV PATH=$CONDA_PREFIX/bin:$PATH
29 | ENV CONDA_AUTO_UPDATE_CONDA=false
30 | 
31 | RUN conda install -y ipython
32 | RUN pip install ninja yacs cython matplotlib opencv-python tqdm
33 | 
34 | # Install PyTorch 1.0 Nightly
35 | ARG CUDA
36 | RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \
37 |  && conda clean -ya
38 | 
39 | # Install TorchVision master
40 | RUN git clone https://github.com/pytorch/vision.git \
41 |  && cd vision \
42 |  && python setup.py install
43 | 
44 | # install pycocotools
45 | RUN git clone https://github.com/cocodataset/cocoapi.git \
46 |  && cd cocoapi/PythonAPI \
47 |  && python setup.py build_ext install
48 | 
49 | # install apex
50 | RUN git clone https://github.com/NVIDIA/apex.git \
51 |  && cd apex \
52 |  && python setup.py install --cuda_ext --cpp_ext
53 | 
54 | # install PyTorch Detection
55 | ARG FORCE_CUDA="1"
56 | ENV FORCE_CUDA=${FORCE_CUDA}
57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
58 |  && cd maskrcnn-benchmark \
59 |  && python setup.py build develop
60 | 
61 | WORKDIR /maskrcnn-benchmark
62 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CUDA="9.0"
 2 | ARG CUDNN="7"
 3 | 
 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
 5 | 
 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
 7 | 
 8 | # install basics
 9 | RUN apt-get update -y \
10 |  && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++
11 | 
12 | # Install Miniconda
13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
14 |  && chmod +x /miniconda.sh \
15 |  && /miniconda.sh -b -p /miniconda \
16 |  && rm /miniconda.sh
17 | 
18 | ENV PATH=/miniconda/bin:$PATH
19 | 
20 | # Create a Python 3.6 environment
21 | RUN /miniconda/bin/conda install -y conda-build \
22 |  && /miniconda/bin/conda create -y --name py36 python=3.6.7 \
23 |  && /miniconda/bin/conda clean -ya
24 | 
25 | ENV CONDA_DEFAULT_ENV=py36
26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
27 | ENV PATH=$CONDA_PREFIX/bin:$PATH
28 | ENV CONDA_AUTO_UPDATE_CONDA=false
29 | 
30 | RUN conda install -y ipython
31 | RUN pip install ninja yacs cython matplotlib jupyter
32 | 
33 | # Install PyTorch 1.0 Nightly and OpenCV
34 | RUN conda install -y pytorch-nightly -c pytorch \
35 |  && conda install -y opencv -c menpo \
36 |  && conda clean -ya
37 | 
38 | WORKDIR /root
39 | 
40 | USER root
41 | 
42 | RUN mkdir /notebooks
43 | 
44 | WORKDIR /notebooks
45 | 
46 | # Install TorchVision master
47 | RUN git clone https://github.com/pytorch/vision.git \
48 |  && cd vision \
49 |  && python setup.py install
50 | 
51 | # install pycocotools
52 | RUN git clone https://github.com/cocodataset/cocoapi.git \
53 |  && cd cocoapi/PythonAPI \
54 |  && python setup.py build_ext install
55 | 
56 | # install PyTorch Detection
57 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
58 |  && cd maskrcnn-benchmark \
59 |  && python setup.py build develop
60 | 
61 | RUN jupyter notebook --generate-config
62 | 
63 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py"
64 | 
65 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH}
66 | 
67 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"]
68 | 


--------------------------------------------------------------------------------
/docker/docker-jupyter/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from IPython.lib import passwd
 3 | 
 4 | #c = c  # pylint:disable=undefined-variable
 5 | c = get_config()
 6 | c.NotebookApp.ip = '0.0.0.0'
 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888))
 8 | c.NotebookApp.open_browser = False
 9 | 
10 | # sets a password if PASSWORD is set in the environment
11 | if 'PASSWORD' in os.environ:
12 |   password = os.environ['PASSWORD']
13 |   if password:
14 |     c.NotebookApp.password = passwd(password)
15 |   else:
16 |     c.NotebookApp.password = ''
17 |     c.NotebookApp.token = ''
18 |   del os.environ['PASSWORD']
19 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | #include "deform_conv.h"
 7 | #include "deform_pool.h"
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("nms", &nms, "non-maximum suppression");
11 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
12 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
13 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
14 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
15 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
16 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
17 |   // dcn-v2
18 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
19 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
20 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
21 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
22 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
23 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
24 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
25 |   m.def("deform_unfold_forward", &deform_unfold_forward, "deform unfold forward");
26 |   m.def("deform_unfold_backward_input", &deform_unfold_backward_input, "deform_unfold_backward_input");
27 | }


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset
3 | from .voc import PascalVOCDataset
4 | from .concat_dataset import ConcatDataset
5 | 
6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | 
 6 | 
 7 | def evaluate(dataset, predictions, output_folder, **kwargs):
 8 |     """evaluate dataset using different methods based on dataset type.
 9 |     Args:
10 |         dataset: Dataset object
11 |         predictions(list[BoxList]): each item in the list represents the
12 |             prediction results for one image.
13 |         output_folder: output folder, to save evaluation files or results.
14 |         **kwargs: other args.
15 |     Returns:
16 |         evaluation result
17 |     """
18 |     args = dict(
19 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
20 |     )
21 |     if isinstance(dataset, datasets.COCODataset):
22 |         return coco_evaluation(**args)
23 |     elif isinstance(dataset, datasets.PascalVOCDataset):
24 |         return voc_evaluation(**args)
25 |     else:
26 |         dataset_name = dataset.__class__.__name__
27 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
28 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     box_only,
 9 |     iou_types,
10 |     expected_results,
11 |     expected_results_sigma_tol,
12 | ):
13 |     return do_coco_evaluation(
14 |         dataset=dataset,
15 |         predictions=predictions,
16 |         box_only=box_only,
17 |         output_folder=output_folder,
18 |         iou_types=iou_types,
19 |         expected_results=expected_results,
20 |         expected_results_sigma_tol=expected_results_sigma_tol,
21 |     )
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = shuffle
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset : offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |         brightness = cfg.INPUT.BRIGHTNESS
11 |         contrast = cfg.INPUT.CONTRAST
12 |         saturation = cfg.INPUT.SATURATION
13 |         hue = cfg.INPUT.HUE
14 |     else:
15 |         min_size = cfg.INPUT.MIN_SIZE_TEST
16 |         max_size = cfg.INPUT.MAX_SIZE_TEST
17 |         flip_prob = 0
18 |         brightness = 0.0
19 |         contrast = 0.0
20 |         saturation = 0.0
21 |         hue = 0.0
22 | 
23 |     to_bgr255 = cfg.INPUT.TO_BGR255
24 |     normalize_transform = T.Normalize(
25 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
26 |     )
27 |     color_jitter = T.ColorJitter(
28 |         brightness=brightness,
29 |         contrast=contrast,
30 |         saturation=saturation,
31 |         hue=hue,
32 |     )
33 | 
34 |     transform = T.Compose(
35 |         [
36 |             color_jitter,
37 |             T.Resize(min_size, max_size),
38 |             T.RandomHorizontalFlip(flip_prob),
39 |             T.ToTensor(),
40 |             normalize_transform,
41 |         ]
42 |     )
43 |     return transform
44 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import DFConv2d
 7 | from .misc import ConvTranspose2d
 8 | from .misc import BatchNorm2d
 9 | from .misc import interpolate
10 | from .nms import nms
11 | from .roi_align import ROIAlign
12 | from .roi_align import roi_align
13 | from .roi_pool import ROIPool
14 | from .roi_pool import roi_pool
15 | from .smooth_l1_loss import smooth_l1_loss
16 | from .sigmoid_focal_loss import SigmoidFocalLoss
17 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
18 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
19 | from .dcn.deform_pool_func import deform_roi_pooling
20 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
21 | from .dcn.deform_unfold_func import deform_unfold
22 | from .dcn.deform_unfold_module import DeformUnfold
23 | from .dynamic_weight import DynamicWeightsCat11, DynamicWeightsCat33, ReDynamicWeightsCat33, ReDynamicWeightsCat11
24 | 
25 | 
26 | __all__ = [
27 |     "nms",
28 |     "roi_align",
29 |     "ROIAlign",
30 |     "roi_pool",
31 |     "ROIPool",
32 |     "smooth_l1_loss",
33 |     "Conv2d",
34 |     "DFConv2d",
35 |     "ConvTranspose2d",
36 |     "interpolate",
37 |     "BatchNorm2d",
38 |     "FrozenBatchNorm2d",
39 |     "SigmoidFocalLoss",
40 |     'deform_conv',
41 |     'modulated_deform_conv',
42 |     'DeformConv',
43 |     'ModulatedDeformConv',
44 |     'ModulatedDeformConvPack',
45 |     'deform_roi_pooling',
46 |     'DeformRoIPooling',
47 |     'DeformRoIPoolingPack',
48 |     'ModulatedDeformRoIPoolingPack',
49 |     'deform_unfold',
50 |     'DeformUnfold',
51 |     'DynamicWeightsCat11',
52 |     'DynamicWeightsCat33',
53 |     'ReDynamicWeightsCat33',
54 |     'ReDynamicWeightsCat11',
55 |     'DeformDGMN',
56 |     # 'GloReLocalModule',
57 | ]
58 | 
59 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         # Cast all fixed parameters to half() if necessary
21 |         if x.dtype == torch.float16:
22 |             self.weight = self.weight.half()
23 |             self.bias = self.bias.half()
24 |             self.running_mean = self.running_mean.half()
25 |             self.running_var = self.running_var.half()
26 | 
27 |         scale = self.weight * self.running_var.rsqrt()
28 |         bias = self.bias - self.running_mean * scale
29 |         scale = scale.reshape(1, -1, 1, 1)
30 |         bias = bias.reshape(1, -1, 1, 1)
31 |         return x * scale + bias
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | # 
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | # 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from maskrcnn_benchmark import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data, 
43 |             rois, 
44 |             offset, 
45 |             output, 
46 |             output_count, 
47 |             ctx.no_trans,
48 |             ctx.spatial_scale, 
49 |             ctx.out_channels, 
50 |             ctx.group_size, 
51 |             ctx.out_size,
52 |             ctx.part_size, 
53 |             ctx.sample_per_part, 
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output, 
77 |             data, 
78 |             rois, 
79 |             offset, 
80 |             output_count, 
81 |             grad_input,
82 |             grad_offset, 
83 |             ctx.no_trans, 
84 |             ctx.spatial_scale, 
85 |             ctx.out_channels,
86 |             ctx.group_size, 
87 |             ctx.out_size, 
88 |             ctx.part_size, 
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/dcn/deform_unfold_module.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from .deform_unfold_func import deform_unfold
 8 | 
 9 | 
10 | class DeformUnfold(nn.Module):
11 | 
12 |     def __init__(self,
13 |                  kernel_size,
14 |                  stride=1,
15 |                  padding=0,
16 |                  dilation=1,
17 |                  deformable_groups=1,
18 |                  bias=False):
19 |         assert not bias
20 |         super(DeformUnfold, self).__init__()
21 | 
22 |         self.kernel_size = _pair(kernel_size)
23 |         self.stride = _pair(stride)
24 |         self.padding = _pair(padding)
25 |         self.dilation = _pair(dilation)
26 |         self.deformable_groups = deformable_groups
27 | 
28 | 
29 |     def forward(self, input, offset):
30 |         return deform_unfold(input, offset, self.kernel_size, self.stride,
31 |                            self.padding, self.dilation,
32 |                            self.deformable_groups)
33 | 
34 |     def __repr__(self):
35 |         return "".join([
36 |             "{}(".format(self.__class__.__name__),
37 |             "kernel_size={}, ".format(self.kernel_size),
38 |             "stride={}, ".format(self.stride),
39 |             "dilation={}, ".format(self.dilation),
40 |             "padding={}, ".format(self.padding),
41 |             "deformable_groups={}, ".format(self.deformable_groups),
42 |         ])
43 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from ._utils import _C
 3 | from maskrcnn_benchmark import _C
 4 | 
 5 | from apex import amp
 6 | 
 7 | # Only valid with fp32 inputs - give AMP the hint
 8 | nms = amp.float_function(_C.nms)
 9 | 
10 | # nms.__doc__ = """
11 | # This function performs Non-maximum suppresion"""
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         output = _C.roi_align_forward(
21 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
22 |         )
23 |         return output
24 | 
25 |     @staticmethod
26 |     @once_differentiable
27 |     def backward(ctx, grad_output):
28 |         rois, = ctx.saved_tensors
29 |         output_size = ctx.output_size
30 |         spatial_scale = ctx.spatial_scale
31 |         sampling_ratio = ctx.sampling_ratio
32 |         bs, ch, h, w = ctx.input_shape
33 |         grad_input = _C.roi_align_backward(
34 |             grad_output,
35 |             rois,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |             sampling_ratio,
44 |         )
45 |         return grad_input, None, None, None, None
46 | 
47 | 
48 | roi_align = _ROIAlign.apply
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     @amp.float_function
58 |     def forward(self, input, rois):
59 |         return roi_align(
60 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
61 |         )
62 | 
63 |     def __repr__(self):
64 |         tmpstr = self.__class__.__name__ + "("
65 |         tmpstr += "output_size=" + str(self.output_size)
66 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
67 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
68 |         tmpstr += ")"
69 |         return tmpstr
70 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIPool(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale):
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.input_shape = input.size()
18 |         output, argmax = _C.roi_pool_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1]
20 |         )
21 |         ctx.save_for_backward(input, roi, argmax)
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         input, rois, argmax = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_pool_backward(
32 |             grad_output,
33 |             input,
34 |             rois,
35 |             argmax,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |         )
44 |         return grad_input, None, None, None
45 | 
46 | 
47 | roi_pool = _ROIPool.apply
48 | 
49 | 
50 | class ROIPool(nn.Module):
51 |     def __init__(self, output_size, spatial_scale):
52 |         super(ROIPool, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 | 
56 |     @amp.float_function
57 |     def forward(self, input, rois):
58 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
59 | 
60 |     def __repr__(self):
61 |         tmpstr = self.__class__.__name__ + "("
62 |         tmpstr += "output_size=" + str(self.output_size)
63 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
64 |         tmpstr += ")"
65 |         return tmpstr
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from maskrcnn_benchmark import _C
 7 | 
 8 | # TODO: Use JIT to replace CUDA implementation in the future.
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets)
13 |         num_classes = logits.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         losses = _C.sigmoid_focalloss_forward(
19 |             logits, targets, num_classes, gamma, alpha
20 |         )
21 |         return losses
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, d_loss):
26 |         logits, targets = ctx.saved_tensors
27 |         num_classes = ctx.num_classes
28 |         gamma = ctx.gamma
29 |         alpha = ctx.alpha
30 |         d_loss = d_loss.contiguous()
31 |         d_logits = _C.sigmoid_focalloss_backward(
32 |             logits, targets, d_loss, num_classes, gamma, alpha
33 |         )
34 |         return d_logits, None, None, None, None
35 | 
36 | 
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 | 
39 | 
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 |     num_classes = logits.shape[1]
42 |     gamma = gamma[0]
43 |     alpha = alpha[0]
44 |     dtype = targets.dtype
45 |     device = targets.device
46 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 | 
48 |     t = targets.unsqueeze(1)
49 |     p = torch.sigmoid(logits)
50 |     term1 = (1 - p) ** gamma * torch.log(p)
51 |     term2 = p ** gamma * torch.log(1 - p)
52 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 | 
54 | 
55 | class SigmoidFocalLoss(nn.Module):
56 |     def __init__(self, gamma, alpha):
57 |         super(SigmoidFocalLoss, self).__init__()
58 |         self.gamma = gamma
59 |         self.alpha = alpha
60 | 
61 |     def forward(self, logits, targets):
62 |         device = logits.device
63 |         if logits.is_cuda:
64 |             loss_func = sigmoid_focal_loss_cuda
65 |         else:
66 |             loss_func = sigmoid_focal_loss_cpu
67 | 
68 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
69 |         return loss.sum()
70 | 
71 |     def __repr__(self):
72 |         tmpstr = self.__class__.__name__ + "("
73 |         tmpstr += "gamma=" + str(self.gamma)
74 |         tmpstr += ", alpha=" + str(self.alpha)
75 |         tmpstr += ")"
76 |         return tmpstr
77 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | from . import fbnet
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | from torch import nn
 5 | 
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform
 8 | from . import fpn as fpn_module
 9 | from . import resnet
10 | 
11 | 
12 | @registry.BACKBONES.register("R-50-C4")
13 | @registry.BACKBONES.register("R-50-C5")
14 | @registry.BACKBONES.register("R-101-C4")
15 | @registry.BACKBONES.register("R-101-C5")
16 | def build_resnet_backbone(cfg):
17 |     body = resnet.ResNet(cfg)
18 |     model = nn.Sequential(OrderedDict([("body", body)]))
19 |     model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
20 |     return model
21 | 
22 | 
23 | @registry.BACKBONES.register("R-50-FPN")
24 | @registry.BACKBONES.register("R-101-FPN")
25 | @registry.BACKBONES.register("R-152-FPN")
26 | def build_resnet_fpn_backbone(cfg):
27 |     body = resnet.ResNet(cfg)
28 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
29 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
30 |     fpn = fpn_module.FPN(
31 |         in_channels_list=[
32 |             in_channels_stage2,
33 |             in_channels_stage2 * 2,
34 |             in_channels_stage2 * 4,
35 |             in_channels_stage2 * 8,
36 |         ],
37 |         out_channels=out_channels,
38 |         conv_block=conv_with_kaiming_uniform(
39 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
40 |         ),
41 |         top_blocks=fpn_module.LastLevelMaxPool(),
42 |     )
43 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
44 |     model.out_channels = out_channels
45 |     return model
46 | 
47 | 
48 | @registry.BACKBONES.register("R-50-FPN-RETINANET")
49 | @registry.BACKBONES.register("R-101-FPN-RETINANET")
50 | def build_resnet_fpn_p3p7_backbone(cfg):
51 |     body = resnet.ResNet(cfg)
52 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
53 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
54 |     in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \
55 |         else out_channels
56 |     fpn = fpn_module.FPN(
57 |         in_channels_list=[
58 |             0,
59 |             in_channels_stage2 * 2,
60 |             in_channels_stage2 * 4,
61 |             in_channels_stage2 * 8,
62 |         ],
63 |         out_channels=out_channels,
64 |         conv_block=conv_with_kaiming_uniform(
65 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
66 |         ),
67 |         top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels),
68 |     )
69 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
70 |     model.out_channels = out_channels
71 |     return model
72 | 
73 | 
74 | def build_backbone(cfg):
75 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
76 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
77 |             cfg.MODEL.BACKBONE.CONV_BODY
78 |         )
79 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
80 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.uint8
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.uint8
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | 
 4 | 
 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
 6 | 
 7 | 
 8 | def build_detection_model(cfg):
 9 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
10 |     return meta_arch(cfg)
11 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..roi_heads.roi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRCNN, self).__init__()
28 | 
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg, self.backbone.out_channels)
31 |         self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
32 | 
33 |     def forward(self, images, targets=None):
34 |         """
35 |         Arguments:
36 |             images (list[Tensor] or ImageList): images to be processed
37 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
38 | 
39 |         Returns:
40 |             result (list[BoxList] or dict[Tensor]): the output from the model.
41 |                 During training, it returns a dict[Tensor] which contains the losses.
42 |                 During testing, it returns list[BoxList] contains additional fields
43 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
44 | 
45 |         """
46 |         if self.training and targets is None:
47 |             raise ValueError("In training mode, targets should be passed")
48 |         images = to_image_list(images)
49 |         features = self.backbone(images.tensors)
50 |         proposals, proposal_losses = self.rpn(images, features, targets)
51 |         if self.roi_heads:
52 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
53 |         else:
54 |             # RPN-only models don't have roi_heads
55 |             x = features
56 |             result = proposals
57 |             detector_losses = {}
58 | 
59 |         if self.training:
60 |             losses = {}
61 |             losses.update(detector_losses)
62 |             losses.update(proposal_losses)
63 |             return losses
64 | 
65 |         return result
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | RPN_HEADS = Registry()
 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 8 | ROI_BOX_PREDICTOR = Registry()
 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
10 | ROI_KEYPOINT_PREDICTOR = Registry()
11 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
12 | ROI_MASK_PREDICTOR = Registry()
13 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | 
10 | 
11 | class ROIBoxHead(torch.nn.Module):
12 |     """
13 |     Generic Box Head class.
14 |     """
15 | 
16 |     def __init__(self, cfg, in_channels):
17 |         super(ROIBoxHead, self).__init__()
18 |         self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels)
19 |         self.predictor = make_roi_box_predictor(
20 |             cfg, self.feature_extractor.out_channels)
21 |         self.post_processor = make_roi_box_post_processor(cfg)
22 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
23 | 
24 |     def forward(self, features, proposals, targets=None):
25 |         """
26 |         Arguments:
27 |             features (list[Tensor]): feature-maps from possibly several levels
28 |             proposals (list[BoxList]): proposal boxes
29 |             targets (list[BoxList], optional): the ground-truth targets.
30 | 
31 |         Returns:
32 |             x (Tensor): the result of the feature extractor
33 |             proposals (list[BoxList]): during training, the subsampled proposals
34 |                 are returned. During testing, the predicted boxlists are returned
35 |             losses (dict[Tensor]): During training, returns the losses for the
36 |                 head. During testing, returns an empty dict.
37 |         """
38 | 
39 |         if self.training:
40 |             # Faster R-CNN subsamples during training the proposals with a fixed
41 |             # positive / negative ratio
42 |             with torch.no_grad():
43 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
44 | 
45 |         # extract features that will be fed to the final classifier. The
46 |         # feature_extractor generally corresponds to the pooler + heads
47 |         x = self.feature_extractor(features, proposals)
48 |         # final classifier that converts the features into predictions
49 |         class_logits, box_regression = self.predictor(x)
50 | 
51 |         if not self.training:
52 |             result = self.post_processor((class_logits, box_regression), proposals)
53 |             return x, result, {}
54 | 
55 |         loss_classifier, loss_box_reg = self.loss_evaluator(
56 |             [class_logits], [box_regression]
57 |         )
58 |         return (
59 |             x,
60 |             proposals,
61 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
62 |         )
63 | 
64 | 
65 | def build_roi_box_head(cfg, in_channels):
66 |     """
67 |     Constructs a new box head.
68 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
69 |     and make it a parameter in the config
70 |     """
71 |     return ROIBoxHead(cfg, in_channels)
72 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.modeling import registry
 3 | from torch import nn
 4 | 
 5 | 
 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, config, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 | 
12 |         num_inputs = in_channels
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
18 |         self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
19 | 
20 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
21 |         nn.init.constant_(self.cls_score.bias, 0)
22 | 
23 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
24 |         nn.init.constant_(self.bbox_pred.bias, 0)
25 | 
26 |     def forward(self, x):
27 |         x = self.avgpool(x)
28 |         x = x.view(x.size(0), -1)
29 |         cls_logit = self.cls_score(x)
30 |         bbox_pred = self.bbox_pred(x)
31 |         return cls_logit, bbox_pred
32 | 
33 | 
34 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor")
35 | class FPNPredictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(FPNPredictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         representation_size = in_channels
40 | 
41 |         self.cls_score = nn.Linear(representation_size, num_classes)
42 |         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
43 |         self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
44 | 
45 |         nn.init.normal_(self.cls_score.weight, std=0.01)
46 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
47 |         for l in [self.cls_score, self.bbox_pred]:
48 |             nn.init.constant_(l.bias, 0)
49 | 
50 |     def forward(self, x):
51 |         if x.ndimension() == 4:
52 |             assert list(x.shape[2:]) == [1, 1]
53 |             x = x.view(x.size(0), -1)
54 |         scores = self.cls_score(x)
55 |         bbox_deltas = self.bbox_pred(x)
56 | 
57 |         return scores, bbox_deltas
58 | 
59 | 
60 | def make_roi_box_predictor(cfg, in_channels):
61 |     func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg, in_channels)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 5 | from .inference import make_roi_keypoint_post_processor
 6 | from .loss import make_roi_keypoint_loss_evaluator
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg, in_channels):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 |         self.predictor = make_roi_keypoint_predictor(
15 |             cfg, self.feature_extractor.out_channels)
16 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
17 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 | 
19 |     def forward(self, features, proposals, targets=None):
20 |         """
21 |         Arguments:
22 |             features (list[Tensor]): feature-maps from possibly several levels
23 |             proposals (list[BoxList]): proposal boxes
24 |             targets (list[BoxList], optional): the ground-truth targets.
25 | 
26 |         Returns:
27 |             x (Tensor): the result of the feature extractor
28 |             proposals (list[BoxList]): during training, the original proposals
29 |                 are returned. During testing, the predicted boxlists are returned
30 |                 with the `mask` field set
31 |             losses (dict[Tensor]): During training, returns the losses for the
32 |                 head. During testing, returns an empty dict.
33 |         """
34 |         if self.training:
35 |             with torch.no_grad():
36 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
37 | 
38 |         x = self.feature_extractor(features, proposals)
39 |         kp_logits = self.predictor(x)
40 | 
41 |         if not self.training:
42 |             result = self.post_processor(kp_logits, proposals)
43 |             return x, result, {}
44 | 
45 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
46 | 
47 |         return x, proposals, dict(loss_kp=loss_kp)
48 | 
49 | 
50 | def build_roi_keypoint_head(cfg, in_channels):
51 |     return ROIKeypointHead(cfg, in_channels)
52 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.modeling import registry
 5 | from maskrcnn_benchmark.modeling.poolers import Pooler
 6 | 
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | 
 9 | 
10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
11 | class KeypointRCNNFeatureExtractor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(KeypointRCNNFeatureExtractor, self).__init__()
14 | 
15 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
16 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
17 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
18 |         pooler = Pooler(
19 |             output_size=(resolution, resolution),
20 |             scales=scales,
21 |             sampling_ratio=sampling_ratio,
22 |         )
23 |         self.pooler = pooler
24 | 
25 |         input_features = in_channels
26 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
27 |         next_feature = input_features
28 |         self.blocks = []
29 |         for layer_idx, layer_features in enumerate(layers, 1):
30 |             layer_name = "conv_fcn{}".format(layer_idx)
31 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
32 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
33 |             nn.init.constant_(module.bias, 0)
34 |             self.add_module(layer_name, module)
35 |             next_feature = layer_features
36 |             self.blocks.append(layer_name)
37 |         self.out_channels = layer_features
38 | 
39 |     def forward(self, x, proposals):
40 |         x = self.pooler(x, proposals)
41 |         for layer_name in self.blocks:
42 |             x = F.relu(getattr(self, layer_name)(x))
43 |         return x
44 | 
45 | 
46 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
47 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
48 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
49 |     ]
50 |     return func(cfg, in_channels)
51 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from maskrcnn_benchmark import layers
 4 | from maskrcnn_benchmark.modeling import registry
 5 | 
 6 | 
 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 8 | class KeypointRCNNPredictor(nn.Module):
 9 |     def __init__(self, cfg, in_channels):
10 |         super(KeypointRCNNPredictor, self).__init__()
11 |         input_features = in_channels
12 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 |         deconv_kernel = 4
14 |         self.kps_score_lowres = layers.ConvTranspose2d(
15 |             input_features,
16 |             num_keypoints,
17 |             deconv_kernel,
18 |             stride=2,
19 |             padding=deconv_kernel // 2 - 1,
20 |         )
21 |         nn.init.kaiming_normal_(
22 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 |         )
24 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
25 |         self.up_scale = 2
26 |         self.out_channels = num_keypoints
27 | 
28 |     def forward(self, x):
29 |         x = self.kps_score_lowres(x)
30 |         x = layers.interpolate(
31 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 |         )
33 |         return x
34 | 
35 | 
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 |     return func(cfg, in_channels)
39 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.poolers import Pooler
 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 9 | 
10 | 
11 | registry.ROI_MASK_FEATURE_EXTRACTORS.register(
12 |     "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor
13 | )
14 | 
15 | 
16 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
17 | class MaskRCNNFPNFeatureExtractor(nn.Module):
18 |     """
19 |     Heads for FPN for classification
20 |     """
21 | 
22 |     def __init__(self, cfg, in_channels):
23 |         """
24 |         Arguments:
25 |             num_classes (int): number of output classes
26 |             input_size (int): number of channels of the input once it's flattened
27 |             representation_size (int): size of the intermediate representation
28 |         """
29 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
30 | 
31 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
32 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
33 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
34 |         pooler = Pooler(
35 |             output_size=(resolution, resolution),
36 |             scales=scales,
37 |             sampling_ratio=sampling_ratio,
38 |         )
39 |         input_size = in_channels
40 |         self.pooler = pooler
41 | 
42 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
43 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
44 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
45 | 
46 |         next_feature = input_size
47 |         self.blocks = []
48 |         for layer_idx, layer_features in enumerate(layers, 1):
49 |             layer_name = "mask_fcn{}".format(layer_idx)
50 |             module = make_conv3x3(
51 |                 next_feature, layer_features,
52 |                 dilation=dilation, stride=1, use_gn=use_gn
53 |             )
54 |             self.add_module(layer_name, module)
55 |             next_feature = layer_features
56 |             self.blocks.append(layer_name)
57 |         self.out_channels = layer_features
58 | 
59 |     def forward(self, x, proposals):
60 |         x = self.pooler(x, proposals)
61 | 
62 |         for layer_name in self.blocks:
63 |             x = F.relu(getattr(self, layer_name)(x))
64 | 
65 |         return x
66 | 
67 | 
68 | def make_roi_mask_feature_extractor(cfg, in_channels):
69 |     func = registry.ROI_MASK_FEATURE_EXTRACTORS[
70 |         cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR
71 |     ]
72 |     return func(cfg, in_channels)
73 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | from maskrcnn_benchmark.modeling import registry
 8 | 
 9 | 
10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
11 | class MaskRCNNC4Predictor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(MaskRCNNC4Predictor, self).__init__()
14 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 |         num_inputs = in_channels
17 | 
18 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
19 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
20 | 
21 |         for name, param in self.named_parameters():
22 |             if "bias" in name:
23 |                 nn.init.constant_(param, 0)
24 |             elif "weight" in name:
25 |                 # Caffe2 implementation uses MSRAFill, which in fact
26 |                 # corresponds to kaiming_normal_ in PyTorch
27 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
28 | 
29 |     def forward(self, x):
30 |         x = F.relu(self.conv5_mask(x))
31 |         return self.mask_fcn_logits(x)
32 | 
33 | 
34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
35 | class MaskRCNNConv1x1Predictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(MaskRCNNConv1x1Predictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         num_inputs = in_channels
40 | 
41 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
42 | 
43 |         for name, param in self.named_parameters():
44 |             if "bias" in name:
45 |                 nn.init.constant_(param, 0)
46 |             elif "weight" in name:
47 |                 # Caffe2 implementation uses MSRAFill, which in fact
48 |                 # corresponds to kaiming_normal_ in PyTorch
49 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
50 | 
51 |     def forward(self, x):
52 |         return self.mask_fcn_logits(x)
53 | 
54 | 
55 | def make_roi_mask_predictor(cfg, in_channels):
56 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
57 |     return func(cfg, in_channels)
58 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Utility functions minipulating the prediction layers
 4 | """
 5 | 
 6 | from ..utils import cat
 7 | 
 8 | import torch
 9 | 
10 | def permute_and_flatten(layer, N, A, C, H, W):
11 |     layer = layer.view(N, -1, C, H, W)
12 |     layer = layer.permute(0, 3, 4, 1, 2)
13 |     layer = layer.reshape(N, -1, C)
14 |     return layer
15 | 
16 | 
17 | def concat_box_prediction_layers(box_cls, box_regression):
18 |     box_cls_flattened = []
19 |     box_regression_flattened = []
20 |     # for each feature level, permute the outputs to make them be in the
21 |     # same format as the labels. Note that the labels are computed for
22 |     # all feature levels concatenated, so we keep the same representation
23 |     # for the objectness and the box_regression
24 |     for box_cls_per_level, box_regression_per_level in zip(
25 |         box_cls, box_regression
26 |     ):
27 |         N, AxC, H, W = box_cls_per_level.shape
28 |         Ax4 = box_regression_per_level.shape[1]
29 |         A = Ax4 // 4
30 |         C = AxC // A
31 |         box_cls_per_level = permute_and_flatten(
32 |             box_cls_per_level, N, A, C, H, W
33 |         )
34 |         box_cls_flattened.append(box_cls_per_level)
35 | 
36 |         box_regression_per_level = permute_and_flatten(
37 |             box_regression_per_level, N, A, 4, H, W
38 |         )
39 |         box_regression_flattened.append(box_regression_per_level)
40 |     # concatenate on the first dimension (representing the feature levels), to
41 |     # take into account the way the labels were generated (with all feature maps
42 |     # being concatenated as well)
43 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
44 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
45 |     return box_cls, box_regression
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR
 5 | 
 6 | 
 7 | def make_optimizer(cfg, model):
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 |         if not value.requires_grad:
11 |             continue
12 |         lr = cfg.SOLVER.BASE_LR
13 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
14 |         if "bias" in key:
15 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
16 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
17 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
18 | 
19 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
20 |     return optimizer
21 | 
22 | 
23 | def make_lr_scheduler(cfg, optimizer):
24 |     return WarmupMultiStepLR(
25 |         optimizer,
26 |         cfg.SOLVER.STEPS,
27 |         cfg.SOLVER.GAMMA,
28 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
29 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
30 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
31 |     )
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         if tensors.dim() == 3:
45 |             tensors = tensors[None]
46 |         assert tensors.dim() == 4
47 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
48 |         return ImageList(tensors, image_sizes)
49 |     elif isinstance(tensors, (tuple, list)):
50 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
51 | 
52 |         # TODO Ideally, just remove this and let me model handle arbitrary
53 |         # input sizs
54 |         if size_divisible > 0:
55 |             import math
56 | 
57 |             stride = size_divisible
58 |             max_size = list(max_size)
59 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
60 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
61 |             max_size = tuple(max_size)
62 | 
63 |         batch_shape = (len(tensors),) + max_size
64 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
65 |         for img, pad_img in zip(tensors, batched_imgs):
66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
67 | 
68 |         image_sizes = [im.shape[-2:] for im in tensors]
69 | 
70 |         return ImageList(batched_imgs, image_sizes)
71 |     else:
72 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
73 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzrobots/dgmn/515476b5c6a07dcc3b7a4d2243c541377624bb33/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import time
 5 | import datetime
 6 | 
 7 | 
 8 | class Timer(object):
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     @property
13 |     def average_time(self):
14 |         return self.total_time / self.calls if self.calls > 0 else 0.0
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.add(time.time() - self.start_time)
23 |         if average:
24 |             return self.average_time
25 |         else:
26 |             return self.diff
27 | 
28 |     def add(self, time_diff):
29 |         self.diff = time_diff
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 | 
33 |     def reset(self):
34 |         self.total_time = 0.0
35 |         self.calls = 0
36 |         self.start_time = 0.0
37 |         self.diff = 0.0
38 | 
39 |     def avg_time_str(self):
40 |         time_str = str(datetime.timedelta(seconds=self.average_time))
41 |         return time_str
42 | 
43 | 
44 | def get_time_str(time_diff):
45 |     time_str = str(datetime.timedelta(seconds=time_diff))
46 |     return time_str
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | yacs
3 | cython
4 | matplotlib
5 | tqdm
6 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | #export NGPUS=8
4 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file "./configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml"
5 | 
6 | 
7 | export NGPUS=8
8 | python -m torch.distributed.launch --nproc_per_node=$NGPUS ./tools/train_net.py --config-file "./configs/dw/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml"
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./test_single_model_1gpu_totalbs1.sh <path/to/config> <path/to/model.pth> <extra_args>
 4 | export NGPUS=4
 5 | for i in 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \
 6 | python -m torch.distributed.launch --nproc_per_node=$NGPUS \
 7 |     tools/test_net.py \
 8 |     --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" \
 9 |     OUTPUT_DIR "models/e2e_mask_rcnn_R_50_FPN_1x" \
10 |     MODEL.WEIGHT "models/e2e_mask_rcnn_R_50_FPN_1x/model_${i}.pth" \
11 |     TEST.IMS_PER_BATCH 16; \
12 |     done
13 | 
14 | #for i in 0090000 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \
15 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS \
16 | #    tools/test_net.py \
17 | #    --config-file "configs/dw/e2e_mask_rcnn_R_50_FPN_2x.yaml" \
18 | #    OUTPUT_DIR "models/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x" \
19 | #    MODEL.WEIGHT "models/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x/model_${i}.pth" \
20 | #    TEST.IMS_PER_BATCH 16; \
21 | #    done
22 | #    MODEL.WEIGHT "/scratch_new/engs1870/maskrcnn/e2e_mask_rcnn_R_50_FPN_001da_33_re_2x/model_${i}.pth" \
23 | 
24 | 
25 | 
26 | #for i in 0002500 0005000 0007500 0010000 0012500 0015000 0017500 0020000 0022500 0025000 0027500 0030000 0032500 0035000 0037500 0040000 0042500 0045000 0047500 0050000 0052500 0055000 0057500 0060000 0062500 0065000 0067500 0070000 0072500 0075000 0077500 0080000 0082500 0085000 0087500 0090000; do \
27 | #python -m torch.distributed.launch --nproc_per_node=$NGPUS \
28 | #    tools/test_net.py \
29 | #    --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" \
30 | #    OUTPUT_DIR "models/Non-local" \
31 | #    MODEL.WEIGHT "models/Non-local/model_${i}.pth" \
32 | #    TEST.IMS_PER_BATCH 16; \
33 | #    done


--------------------------------------------------------------------------------
/tests/env_tests/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | def get_config_root_path():
 7 |     ''' Path to configs for unit tests '''
 8 |     # cur_file_dir is root/tests/env_tests
 9 |     cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
10 |     ret = os.path.dirname(os.path.dirname(cur_file_dir))
11 |     ret = os.path.join(ret, "configs")
12 |     return ret
13 | 


--------------------------------------------------------------------------------
/tests/test_backbones.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register backbones
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling import registry
 9 | from maskrcnn_benchmark.config import cfg as g_cfg
10 | from utils import load_config
11 | 
12 | 
13 | # overwrite configs if specified, otherwise default config is used
14 | BACKBONE_CFGS = {
15 |     "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml",
16 |     "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
17 |     "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
18 |     "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml",
19 |     "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml",
20 | }
21 | 
22 | 
23 | class TestBackbones(unittest.TestCase):
24 |     def test_build_backbones(self):
25 |         ''' Make sure backbones run '''
26 | 
27 |         self.assertGreater(len(registry.BACKBONES), 0)
28 | 
29 |         for name, backbone_builder in registry.BACKBONES.items():
30 |             print('Testing {}...'.format(name))
31 |             if name in BACKBONE_CFGS:
32 |                 cfg = load_config(BACKBONE_CFGS[name])
33 |             else:
34 |                 # Use default config if config file is not specified
35 |                 cfg = copy.deepcopy(g_cfg)
36 |             backbone = backbone_builder(cfg)
37 | 
38 |             # make sures the backbone has `out_channels`
39 |             self.assertIsNotNone(
40 |                 getattr(backbone, 'out_channels', None),
41 |                 'Need to provide out_channels for backbone {}'.format(name)
42 |             )
43 | 
44 |             N, C_in, H, W = 2, 3, 224, 256
45 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
46 |             out = backbone(input)
47 |             for cur_out in out:
48 |                 self.assertEqual(
49 |                     cur_out.shape[:2],
50 |                     torch.Size([N, backbone.out_channels])
51 |                 )
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/test_configs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import glob
 5 | import os
 6 | import utils
 7 | 
 8 | 
 9 | class TestConfigs(unittest.TestCase):
10 |     def test_configs_load(self):
11 |         ''' Make sure configs are loadable '''
12 | 
13 |         cfg_root_path = utils.get_config_root_path()
14 |         files = glob.glob(
15 |             os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True)
16 |         self.assertGreater(len(files), 0)
17 | 
18 |         for fn in files:
19 |             print('Loading {}...'.format(fn))
20 |             utils.load_config_from_file(fn)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/tests/test_metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | 
 4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 5 | 
 6 | 
 7 | class TestMetricLogger(unittest.TestCase):
 8 |     def test_update(self):
 9 |         meter = MetricLogger()
10 |         for i in range(10):
11 |             meter.update(metric=float(i))
12 |         
13 |         m = meter.meters["metric"]
14 |         self.assertEqual(m.count, 10)
15 |         self.assertEqual(m.total, 45)
16 |         self.assertEqual(m.median, 4)
17 |         self.assertEqual(m.avg, 4.5)
18 | 
19 |     def test_no_attr(self):
20 |         meter = MetricLogger()
21 |         _ = meter.meters
22 |         _ = meter.delimiter
23 |         def broken():
24 |             _ = meter.not_existent
25 |         self.assertRaises(AttributeError, broken)
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/tests/test_rpn_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | import copy
 5 | import torch
 6 | # import modules to to register rpn heads
 7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
 8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA
 9 | from maskrcnn_benchmark.modeling import registry
10 | from maskrcnn_benchmark.config import cfg as g_cfg
11 | from utils import load_config
12 | 
13 | 
14 | # overwrite configs if specified, otherwise default config is used
15 | RPN_CFGS = {
16 | }
17 | 
18 | 
19 | class TestRPNHeads(unittest.TestCase):
20 |     def test_build_rpn_heads(self):
21 |         ''' Make sure rpn heads run '''
22 | 
23 |         self.assertGreater(len(registry.RPN_HEADS), 0)
24 | 
25 |         in_channels = 64
26 |         num_anchors = 10
27 | 
28 |         for name, builder in registry.RPN_HEADS.items():
29 |             print('Testing {}...'.format(name))
30 |             if name in RPN_CFGS:
31 |                 cfg = load_config(RPN_CFGS[name])
32 |             else:
33 |                 # Use default config if config file is not specified
34 |                 cfg = copy.deepcopy(g_cfg)
35 | 
36 |             rpn = builder(cfg, in_channels, num_anchors)
37 | 
38 |             N, C_in, H, W = 2, in_channels, 24, 32
39 |             input = torch.rand([N, C_in, H, W], dtype=torch.float32)
40 |             LAYERS = 3
41 |             out = rpn([input] * LAYERS)
42 |             self.assertEqual(len(out), 2)
43 |             logits, bbox_reg = out
44 |             for idx in range(LAYERS):
45 |                 self.assertEqual(
46 |                     logits[idx].shape,
47 |                     torch.Size([
48 |                         input.shape[0], num_anchors,
49 |                         input.shape[2], input.shape[3],
50 |                     ])
51 |                 )
52 |                 self.assertEqual(
53 |                     bbox_reg[idx].shape,
54 |                     torch.Size([
55 |                         logits[idx].shape[0], num_anchors * 4,
56 |                         logits[idx].shape[2], logits[idx].shape[3],
57 |                     ]),
58 |                 )
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------
/tests/test_segmentation_mask.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import unittest
 3 | import torch
 4 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
 5 | 
 6 | 
 7 | class TestSegmentationMask(unittest.TestCase):
 8 |     def __init__(self, method_name='runTest'):
 9 |         super(TestSegmentationMask, self).__init__(method_name)
10 |         poly = [[[423.0, 306.5, 406.5, 277.0, 400.0, 271.5, 389.5, 277.0,
11 |                   387.5, 292.0, 384.5, 295.0, 374.5, 220.0, 378.5, 210.0,
12 |                   391.0, 200.5, 404.0, 199.5, 414.0, 203.5, 425.5, 221.0,
13 |                   438.5, 297.0, 423.0, 306.5],
14 |                  [100, 100,     200, 100,     200, 200,     100, 200],
15 |                 ]]
16 |         width = 640
17 |         height = 480
18 |         size = width, height
19 | 
20 |         self.P = SegmentationMask(poly, size, 'poly')
21 |         self.M = SegmentationMask(poly, size, 'poly').convert('mask')
22 | 
23 | 
24 |     def L1(self, A, B):
25 |         diff = A.get_mask_tensor() - B.get_mask_tensor()
26 |         diff = torch.sum(torch.abs(diff.float())).item()
27 |         return diff
28 | 
29 | 
30 |     def test_convert(self):
31 |         M_hat = self.M.convert('poly').convert('mask')
32 |         P_hat = self.P.convert('mask').convert('poly')
33 | 
34 |         diff_mask = self.L1(self.M, M_hat)
35 |         diff_poly = self.L1(self.P, P_hat)
36 |         self.assertTrue(diff_mask == diff_poly)
37 |         self.assertTrue(diff_mask <= 8169.)
38 |         self.assertTrue(diff_poly <= 8169.)
39 | 
40 | 
41 |     def test_crop(self):
42 |         box = [400, 250, 500, 300] # xyxy
43 |         diff = self.L1(self.M.crop(box), self.P.crop(box))
44 |         self.assertTrue(diff <= 1.)
45 | 
46 | 
47 |     def test_resize(self):
48 |         new_size = 50, 25
49 |         M_hat = self.M.resize(new_size)
50 |         P_hat = self.P.resize(new_size)
51 |         diff = self.L1(M_hat, P_hat)
52 | 
53 |         self.assertTrue(self.M.size == self.P.size)
54 |         self.assertTrue(M_hat.size == P_hat.size)
55 |         self.assertTrue(self.M.size != M_hat.size)
56 |         self.assertTrue(diff <= 255.)
57 | 
58 | 
59 |     def test_transpose(self):
60 |         FLIP_LEFT_RIGHT = 0
61 |         FLIP_TOP_BOTTOM = 1
62 |         diff_hor = self.L1(self.M.transpose(FLIP_LEFT_RIGHT),
63 |                            self.P.transpose(FLIP_LEFT_RIGHT))
64 | 
65 |         diff_ver = self.L1(self.M.transpose(FLIP_TOP_BOTTOM),
66 |                            self.P.transpose(FLIP_TOP_BOTTOM))
67 | 
68 |         self.assertTrue(diff_hor <= 53250.)
69 |         self.assertTrue(diff_ver <= 42494.)
70 | 
71 | 
72 | if __name__ == "__main__":
73 | 
74 |     unittest.main()
75 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function, unicode_literals
 2 | 
 3 | # Set up custom environment before nearly anything else is imported
 4 | # NOTE: this should be the first import (no not reorder)
 5 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 6 | import env_tests.env as env_tests
 7 | 
 8 | import os
 9 | import copy
10 | 
11 | from maskrcnn_benchmark.config import cfg as g_cfg
12 | 
13 | 
14 | def get_config_root_path():
15 |     return env_tests.get_config_root_path()
16 | 
17 | 
18 | def load_config(rel_path):
19 |     ''' Load config from file path specified as path relative to config_root '''
20 |     cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path)
21 |     return load_config_from_file(cfg_path)
22 | 
23 | 
24 | def load_config_from_file(file_path):
25 |     ''' Load config from file path specified as absolute path '''
26 |     ret = copy.deepcopy(g_cfg)
27 |     ret.merge_from_file(file_path)
28 |     return ret
29 | 


--------------------------------------------------------------------------------
/tools/cityscapes/instances2dict_with_polygons.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # Convert instances from png files to a dictionary
 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111
 5 | 
 6 | from __future__ import print_function, absolute_import, division
 7 | import os, sys
 8 | 
 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
10 | from csHelpers import *
11 | 
12 | # Cityscapes imports
13 | from cityscapesscripts.evaluation.instance import *
14 | from cityscapesscripts.helpers.csHelpers import *
15 | import cv2
16 | from maskrcnn_benchmark.utils import cv2_util
17 | 
18 | 
19 | def instances2dict_with_polygons(imageFileList, verbose=False):
20 |     imgCount     = 0
21 |     instanceDict = {}
22 | 
23 |     if not isinstance(imageFileList, list):
24 |         imageFileList = [imageFileList]
25 | 
26 |     if verbose:
27 |         print("Processing {} images...".format(len(imageFileList)))
28 | 
29 |     for imageFileName in imageFileList:
30 |         # Load image
31 |         img = Image.open(imageFileName)
32 | 
33 |         # Image as numpy array
34 |         imgNp = np.array(img)
35 | 
36 |         # Initialize label categories
37 |         instances = {}
38 |         for label in labels:
39 |             instances[label.name] = []
40 | 
41 |         # Loop through all instance ids in instance image
42 |         for instanceId in np.unique(imgNp):
43 |             if instanceId < 1000:
44 |                 continue
45 |             instanceObj = Instance(imgNp, instanceId)
46 |             instanceObj_dict = instanceObj.toDict()
47 | 
48 |             #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict())
49 |             if id2label[instanceObj.labelID].hasInstances:
50 |                 mask = (imgNp == instanceId).astype(np.uint8)
51 |                 contour, hier = cv2_util.findContours(
52 |                     mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
53 | 
54 |                 polygons = [c.reshape(-1).tolist() for c in contour]
55 |                 instanceObj_dict['contours'] = polygons
56 | 
57 |             instances[id2label[instanceObj.labelID].name].append(instanceObj_dict)
58 | 
59 |         imgKey = os.path.abspath(imageFileName)
60 |         instanceDict[imgKey] = instances
61 |         imgCount += 1
62 | 
63 |         if verbose:
64 |             print("\rImages Processed: {}".format(imgCount), end=' ')
65 |             sys.stdout.flush()
66 | 
67 |     if verbose:
68 |         print("")
69 | 
70 |     return instanceDict
71 | 
72 | def main(argv):
73 |     fileList = []
74 |     if (len(argv) > 2):
75 |         for arg in argv:
76 |             if ("png" in arg):
77 |                 fileList.append(arg)
78 |     instances2dict_with_polygons(fileList, True)
79 | 
80 | if __name__ == "__main__":
81 |     main(sys.argv[1:])
82 | 


--------------------------------------------------------------------------------