├── ABSTRACTIONS.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── INSTALL.md ├── MODEL_ZOO.md ├── README.md ├── TROUBLESHOOTING.md ├── configs ├── cityscapes │ ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml │ ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_order.yaml │ ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml │ ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc.yaml │ ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc_order.yaml │ └── panoptic_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── panoptic_2gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml ├── panoptic_2gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x.yaml ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order.yaml ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order_nodense.yaml ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml ├── panoptic_4gpu_mask_rcnn_R_101_FPN_1x_test_dev.yaml ├── panoptic_4gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml ├── panoptic_e2e_mask_rcnn_R_101_FPN_1x_order.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_test.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_val.yaml ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.py └── panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ ├── panoptic_cityscapes.py │ │ ├── panoptic_coco.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ └── deform_conv_module.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ ├── generalized_rcnn.py │ │ └── panoptic_fpn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ ├── order_head │ │ │ ├── __init__.py │ │ │ ├── loss.py │ │ │ ├── order_head.py │ │ │ ├── roi_order_feature_extractors.py │ │ │ └── roi_order_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── rpn.py │ ├── segmenter │ │ ├── __init__.py │ │ ├── segmenters.py │ │ └── semantic_fpn.py │ ├── semantic │ │ ├── __init__.py │ │ ├── loss.py │ │ └── segmentation.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── panoptic.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── env.py │ ├── experiment.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ └── registry.py ├── setup.py └── tools ├── test_net.py └── train_net.py /ABSTRACTIONS.md: -------------------------------------------------------------------------------- 1 | ## Abstractions 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to 3 | have in mind are the following: 4 | 5 | ### ImageList 6 | In PyTorch, the first dimension of the input to the network generally represents 7 | the batch dimension, and thus all elements of the same batch have the same 8 | height / width. 9 | In order to support images with different sizes and aspect ratios in the same 10 | batch, we created the `ImageList` class, which holds internally a batch of 11 | images (os possibly different sizes). The images are padded with zeros such that 12 | they have the same final size and batched over the first dimension. The original 13 | sizes of the images before padding are stored in the `image_sizes` attribute, 14 | and the batched tensor in `tensors`. 15 | We provide a convenience function `to_image_list` that accepts a few different 16 | input types, including a list of tensors, and returns an `ImageList` object. 17 | 18 | ```python 19 | from maskrcnn_benchmark.structures.image_list import to_image_list 20 | 21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)] 22 | batched_images = to_image_list(images) 23 | 24 | # it is also possible to make the final batched image be a multiple of a number 25 | batched_images_32 = to_image_list(images, size_divisible=32) 26 | ``` 27 | 28 | ### BoxList 29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for 30 | a specific image, as well as the size of the image as a `(width, height)` tuple. 31 | It also contains a set of methods that allow to perform geometric 32 | transformations to the bounding boxes (such as cropping, scaling and flipping). 33 | The class accepts bounding boxes from two different input formats: 34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and 35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`. 36 | 37 | Additionally, each `BoxList` instance can also hold arbitrary additional information 38 | for each bounding box, such as labels, visibility, probability scores etc. 39 | 40 | Here is an example on how to create a `BoxList` from a list of coordinates: 41 | ```python 42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT 43 | 44 | width = 100 45 | height = 200 46 | boxes = [ 47 | [0, 10, 50, 50], 48 | [50, 20, 90, 60], 49 | [10, 10, 50, 50] 50 | ] 51 | # create a BoxList with 3 boxes 52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy') 53 | 54 | # perform some box transformations, has similar API as PIL.Image 55 | bbox_scaled = bbox.resize((width * 2, height * 3)) 56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT) 57 | 58 | # add labels for each bbox 59 | labels = torch.tensor([0, 10, 1]) 60 | bbox.add_field('labels', labels) 61 | 62 | # bbox also support a few operations, like indexing 63 | # here, selects boxes 0 and 2 64 | bbox_subset = bbox[[0, 2]] 65 | ``` 66 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Mask-RCNN Benchmark 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis. 7 | 8 | ## Pull Requests 9 | We actively welcome your pull requests. 10 | 11 | 1. Fork the repo and create your branch from `master`. 12 | 2. If you've added code that should be tested, add tests. 13 | 3. If you've changed APIs, update the documentation. 14 | 4. Ensure the test suite passes. 15 | 5. Make sure your code lints. 16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 17 | 18 | ## Contributor License Agreement ("CLA") 19 | In order to accept your pull request, we need you to submit a CLA. You only need 20 | to do this once to work on any of Facebook's open source projects. 21 | 22 | Complete your CLA here: 23 | 24 | ## Issues 25 | We use GitHub issues to track public bugs. Please ensure your description is 26 | clear and has sufficient instructions to be able to reproduce the issue. 27 | 28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 29 | disclosure of security bugs. In those cases, please go through the process 30 | outlined on that page and do not file a public issue. 31 | 32 | ## Coding Style 33 | * 4 spaces for indentation rather than tabs 34 | * 80 character line length 35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/) 36 | 37 | ## License 38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/ 5 | - torchvision from master 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9 10 | - (optional) OpenCV for the webcam demo 11 | 12 | 13 | ### Option 1: Step-by-step installation 14 | 15 | ```bash 16 | # first, make sure that your conda is setup properly with the right environment 17 | # for that, check that `which conda`, `which pip` and `which python` points to the 18 | # right path. From a clean conda env, this is what you need to do 19 | 20 | conda create --name maskrcnn_benchmark 21 | source activate maskrcnn_benchmark 22 | 23 | # this installs the right pip and dependencies for the fresh python 24 | conda install ipython 25 | 26 | # maskrcnn_benchmark and coco api dependencies 27 | pip install ninja yacs cython matplotlib 28 | 29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 30 | # we give the instructions for CUDA 9.0 31 | conda install pytorch-nightly -c pytorch 32 | 33 | # install torchvision 34 | cd ~/github 35 | git clone https://github.com/pytorch/vision.git 36 | cd vision 37 | python setup.py install 38 | 39 | # install pycocotools 40 | cd ~/github 41 | git clone https://github.com/cocodataset/cocoapi.git 42 | cd cocoapi/PythonAPI 43 | python setup.py build_ext install 44 | 45 | # install PyTorch Detection 46 | cd ~/github 47 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 48 | cd maskrcnn-benchmark 49 | # the following will install the lib with 50 | # symbolic links, so that you can modify 51 | # the files if you want and won't need to 52 | # re-build it 53 | python setup.py build develop 54 | 55 | # or if you are on macOS 56 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 57 | ``` 58 | 59 | ### Option 2: Docker Image (Requires CUDA, Linux only) 60 | 61 | Build image with defaults (`CUDA=9.0`, `CUDNN=7`): 62 | 63 | nvidia-docker build -t maskrcnn-benchmark docker/ 64 | 65 | Build image with other CUDA and CUDNN versions: 66 | 67 | nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ 68 | 69 | Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook): 70 | 71 | nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/ 72 | nvidia-docker run -td -p 8888:8888 -e PASSWORD= -v : maskrcnn-benchmark-jupyter -------------------------------------------------------------------------------- /MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | ## Model Zoo and Baselines 2 | 3 | ### Hardware 4 | - 8 NVIDIA V100 GPUs 5 | 6 | ### Software 7 | - PyTorch version: 1.0.0a0+dd2c487 8 | - CUDA 9.2 9 | - CUDNN 7.1 10 | - NCCL 2.2.13-1 11 | 12 | ### End-to-end Faster and Mask R-CNN baselines 13 | 14 | All the baselines were trained using the exact same experimental setup as in Detectron. 15 | We initialize the detection models with ImageNet weights from Caffe2, the same as used by Detectron. 16 | 17 | The pre-trained models are available in the link in the model id. 18 | 19 | backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | mask AP | model id 20 | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- 21 | R-50-C4 | Fast | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.17130 | 34.8 | - | [6358800](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_C4_1x.pth) 22 | R-50-FPN | Fast | 1x | 2 | 4.4 | 0.3530 | 8.8 | 0.12580 | 36.8 | - | [6358793](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_FPN_1x.pth) 23 | R-101-FPN | Fast | 1x | 2 | 7.1 | 0.4591 | 11.5 | 0.143149 | 39.1 | - | [6358804](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_101_FPN_1x.pth) 24 | X-101-32x8d-FPN | Fast | 1x | 1 | 7.6 | 0.7007 | 35.0 | 0.209965 | 41.2 | - | [6358717](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth) 25 | R-50-C4 | Mask | 1x | 1 | 5.8 | 0.4520 | 22.6 | 0.17796 + 0.028 | 35.6 | 31.5 | [6358801](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_C4_1x.pth) 26 | R-50-FPN | Mask | 1x | 2 | 5.2 | 0.4536 | 11.3 | 0.12966 + 0.034 | 37.8 | 34.2 | [6358792](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth) 27 | R-101-FPN | Mask | 1x | 2 | 7.9 | 0.5665 | 14.2 | 0.15384 + 0.034 | 40.1 | 36.1 | [6358805](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth) 28 | X-101-32x8d-FPN | Mask | 1x | 1 | 7.8 | 0.7562 | 37.8 | 0.21739 + 0.034 | 42.2 | 37.8 | [6358718](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth) 29 | 30 | 31 | ## Comparison with Detectron and mmdetection 32 | 33 | In the following section, we compare our implementation with [Detectron](https://github.com/facebookresearch/Detectron) 34 | and [mmdetection](https://github.com/open-mmlab/mmdetection). 35 | The same remarks from [mmdetection](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#training-speed) 36 | about different hardware applies here. 37 | 38 | ### Training speed 39 | 40 | The numbers here are in seconds / iteration. The lower, the better. 41 | 42 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 43 | -- | -- | -- | -- 44 | Faster R-CNN R-50 C4 | 0.566 | - | 0.4036 45 | Faster R-CNN R-50 FPN | 0.544 | 0.554 | 0.3530 46 | Faster R-CNN R-101 FPN | 0.647 | - | 0.4591 47 | Faster R-CNN X-101-32x8d FPN | 0.799 | - | 0.7007 48 | Mask R-CNN R-50 C4 | 0.620 | - | 0.4520 49 | Mask R-CNN R-50 FPN | 0.889 | 0.690 | 0.4536 50 | Mask R-CNN R-101 FPN | 1.008 | - | 0.5665 51 | Mask R-CNN X-101-32x8d FPN | 0.961 | - | 0.7562 52 | 53 | ### Training memory 54 | 55 | The lower, the better 56 | 57 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 58 | -- | -- | -- | -- 59 | Faster R-CNN R-50 C4 | 6.3 | - | 5.8 60 | Faster R-CNN R-50 FPN | 7.2 | 4.9 | 4.4 61 | Faster R-CNN R-101 FPN | 8.9 | - | 7.1 62 | Faster R-CNN X-101-32x8d FPN | 7.0 | - | 7.6 63 | Mask R-CNN R-50 C4 | 6.6 | - | 5.8 64 | Mask R-CNN R-50 FPN | 8.6 | 5.9 | 5.2 65 | Mask R-CNN R-101 FPN | 10.2 | - | 7.9 66 | Mask R-CNN X-101-32x8d FPN | 7.7 | - | 7.8 67 | 68 | ### Accuracy 69 | 70 | The higher, the better 71 | 72 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100) 73 | -- | -- | -- | -- 74 | Faster R-CNN R-50 C4 | 34.8 | - | 34.8 75 | Faster R-CNN R-50 FPN | 36.7 | 36.7 | 36.8 76 | Faster R-CNN R-101 FPN | 39.4 | - | 39.1 77 | Faster R-CNN X-101-32x8d FPN | 41.3 | - | 41.2 78 | Mask R-CNN R-50 C4 | 35.8 & 31.4 | - | 35.6 & 31.5 79 | Mask R-CNN R-50 FPN | 37.7 & 33.9 | 37.5 & 34.4 | 37.8 & 34.2 80 | Mask R-CNN R-101 FPN | 40.0 & 35.9 | - | 40.1 & 36.1 81 | Mask R-CNN X-101-32x8d FPN | 42.1 & 37.3 | - | 42.2 & 37.8 82 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning Instance Occlusion 2 | 3 | This is the code for the CVPR 2020 [paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Lazarow_Learning_Instance_Occlusion_for_Panoptic_Segmentation_CVPR_2020_paper.pdf) "Learning Instance Occlusion for Panoptic Segmentation". 4 | 5 | This project is based off of the excellent [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). We extend it to: 6 | 7 | 1. Support [Panoptic Segmentation/Panoptic FPN](https://arxiv.org/abs/1901.02446) 8 | 2. Support learning an instance-wise relationship to determine occlusion 9 | 3. Integrate this into the [existing greedy merging heuristic](https://arxiv.org/abs/1801.00868) 10 | 11 | If you make use of the ideas or code in this project, please consider citing: 12 | 13 | ``` 14 | @InProceedings{Lazarow_2020_CVPR, 15 | author = {Lazarow, Justin and Lee, Kwonjoon and Shi, Kunyu and Tu, Zhuowen}, 16 | title = {Learning Instance Occlusion for Panoptic Segmentation}, 17 | booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 18 | month = {June}, 19 | year = {2020} 20 | } 21 | ``` 22 | -------------------------------------------------------------------------------- /TROUBLESHOOTING.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | Here is a compilation if common issues that you might face 4 | while compiling / running this code: 5 | 6 | ## Compilation errors when compiling the library 7 | If you encounter build errors like the following: 8 | ``` 9 | /usr/include/c++/6/type_traits:1558:8: note: provided for ‘template struct std::is_convertible’ 10 | struct is_convertible 11 | ^~~~~~~~~~~~~~ 12 | /usr/include/c++/6/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple&&; bool = true; _Elements = {at::Tensor, at::Tensor, at::Tensor, at::Tensor}]’ not a return-statement 13 | } 14 | ^ 15 | error: command '/usr/local/cuda/bin/nvcc' failed with exit status 1 16 | ``` 17 | check your CUDA version and your `gcc` version. 18 | ``` 19 | nvcc --version 20 | gcc --version 21 | ``` 22 | If you are using CUDA 9.0 and gcc 6.4.0, then refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/25, 23 | which has a summary of the solution. Basically, CUDA 9.0 is not compatible with gcc 6.4.0. 24 | 25 | ## ImportError: No module named maskrcnn_benchmark.config when running webcam.py 26 | 27 | This means that `maskrcnn-benchmark` has not been properly installed. 28 | Refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/22 for a few possible issues. 29 | Note that we now support Python 2 as well. 30 | 31 | ## Segmentation fault (core dumped) when running the library 32 | This probably means that you have compiled the library using GCC < 4.9, which is ABI incompatible with PyTorch. 33 | Indeed, during installation, you probably saw a message like 34 | ``` 35 | Your compiler (g++ 4.8) may be ABI-incompatible with PyTorch! 36 | Please use a compiler that is ABI-compatible with GCC 4.9 and above. 37 | See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html. 38 | 39 | See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 40 | for instructions on how to install GCC 4.9 or higher. 41 | ``` 42 | Follow the instructions on https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 43 | to install GCC 4.9 or higher, and try recompiling `maskrcnn-benchmark` again, after cleaning the 44 | `build` folder with 45 | ``` 46 | rm -rf build 47 | ``` 48 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | INPUT: 4 | MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024) 5 | MAX_SIZE_TRAIN: 2048 6 | MIN_SIZE_TEST: 1024 7 | MAX_SIZE_TEST: 2048 8 | MODEL: 9 | META_ARCHITECTURE: "PanopticFPN" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | BACKBONE: 12 | CONV_BODY: "R-50-FPN" 13 | OUT_CHANNELS: 256 14 | RPN: 15 | USE_FPN: True 16 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 17 | PRE_NMS_TOP_N_TRAIN: 2000 18 | PRE_NMS_TOP_N_TEST: 1000 19 | POST_NMS_TOP_N_TEST: 1000 20 | FPN_POST_NMS_TOP_N_TEST: 1000 21 | USE_SEMANTIC_FEATURES: False 22 | RPN_HEAD: "SingleConvRPNHead" 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | NUM_CLASSES: 9 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | ROI_ORDER_HEAD: 41 | BATCH_SIZE_PER_IMAGE: 128 42 | SHARE_MASK_FEATURE_EXTRACTOR: False 43 | ENSURE_CONSISTENCY: True 44 | WEIGHT: 1.0 45 | SEMANTIC: 46 | USE_FPN: True 47 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 48 | UPSAMPLE_MODULE: "One3x3ReLU" 49 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 50 | COLLAPSE_THING_ONTOLOGY: True 51 | UPSAMPLE_METHOD: "bilinear" 52 | NUM_CLASSES: 11 53 | PANOPTIC: 54 | INSTANCE_WEIGHT: 1.0 55 | SEMANTIC_WEIGHT: 1.0 56 | COMPUTE_CC_RESULTS: False 57 | COMPUTE_PRE_RESULTS: False 58 | MASK_ON: True 59 | ORDER_ON: False 60 | WEIGHT: "/local/experiments/panoptic/cityscapes/05162019_155406/model_0020000.pth" # "/local/experiments/panoptic/cityscapes/06162019_162039/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0027500.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0017500.pth" #"/local/experiments/03222019_005905_model_0020000.pth" #"/local/experiments/03222019_005905_model_0017500.pth" #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth" #"/local/experiments/03212019_225953_model_final.pth" #"/local/experiments/03212019_004629_model_0012500.pth" #"/local/experiments/panoptic/cityscapes/03212019_190229/model_final.pth" #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth" #"/local/experiments/03212019_004629_model_0010000.pth" #panoptic/cityscapes/03212019_161835/model_final.pth" # #"/local/experiments/panoptic/cityscapes/03212019_141521/model_final.pth" 61 | #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth" ##"/local/experiments/panoptic/cityscapes/03202019_183257/model_final.pth"#"/local/experiments/panoptic/cityscapes/03212019_000352/model_final.pth" # 62 | #"/local/experiments/panoptic/cityscapes/03202019_135406/model_final.pth" ## 63 | DATASETS: 64 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",) 65 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_tiny_cocostyle",) 66 | DATALOADER: 67 | SIZE_DIVISIBILITY: 32 68 | SOLVER: 69 | IMS_PER_BATCH: 4 70 | BASE_LR: 0.02 71 | WEIGHT_DECAY: 0.0001 72 | STEPS: (9000,) 73 | MAX_ITER: 12000 74 | RESUME_ITER: 8750 75 | OVERRIDE_OPTIMIZER: True 76 | TEST: 77 | IMS_PER_BATCH: 2 78 | PREDICTION_PATHS: [] #"/local/experiments/panoptic/cityscapes/06162019_162039/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]#["/local/experiments/panoptic/cityscapes/03212019_190229/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_161835/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_141521/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_000352/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] 79 | TEMPORARY_DIR: "/local/tmp" 80 | NAME: "cityscapes" 81 | DESCRIPTION: "collapsed, fine-tuning order head" 82 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_order.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | INPUT: 4 | MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024) 5 | MAX_SIZE_TRAIN: 2048 6 | MIN_SIZE_TEST: 1024 7 | MAX_SIZE_TEST: 2048 8 | MODEL: 9 | META_ARCHITECTURE: "PanopticFPN" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | BACKBONE: 12 | CONV_BODY: "R-50-FPN" 13 | OUT_CHANNELS: 256 14 | RPN: 15 | USE_FPN: True 16 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 17 | PRE_NMS_TOP_N_TRAIN: 2000 18 | PRE_NMS_TOP_N_TEST: 1000 19 | POST_NMS_TOP_N_TEST: 1000 20 | FPN_POST_NMS_TOP_N_TEST: 1000 21 | USE_SEMANTIC_FEATURES: False 22 | RPN_HEAD: "SingleConvRPNHead" 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | NUM_CLASSES: 9 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | ROI_ORDER_HEAD: 41 | OVERLAP_THRESHOLD: 0.05 42 | BATCH_SIZE_PER_IMAGE: 512 43 | SHARE_MASK_FEATURE_EXTRACTOR: False 44 | ENSURE_CONSISTENCY: False 45 | WEIGHT: 1.0 46 | ONLY_TRAIN: True 47 | SEMANTIC: 48 | USE_FPN: True 49 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 50 | UPSAMPLE_MODULE: "One3x3ReLU" 51 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 52 | COLLAPSE_THING_ONTOLOGY: True 53 | UPSAMPLE_METHOD: "bilinear" 54 | NUM_CLASSES: 11 55 | PANOPTIC: 56 | INSTANCE_WEIGHT: 1.0 57 | SEMANTIC_WEIGHT: 1.0 58 | COMPUTE_CC_RESULTS: False 59 | COMPUTE_PRE_RESULTS: True 60 | MASK_ON: True 61 | ORDER_ON: True 62 | WEIGHT: "/local/experiments/panoptic/cityscapes/06172019_233221/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth" #"/local/experiments/panoptic/cityscapes/06172019_190920/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth" #"/local/experiments/panoptic/cityscapes/05242019_172548/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0032500.pth" # "/local/experiments/panoptic/cityscapes/05242019_172548/model_final.pth" # "/local/experiments/panoptic/cityscapes/06032019_154759/model_final.pth" # "/local/experiments/panoptic/cityscapes/06112019_225624/model_final.pth" 63 | DATASETS: 64 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle_overlap005",) 65 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) #("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) #_overlap005 66 | DATALOADER: 67 | SIZE_DIVISIBILITY: 32 68 | SOLVER: 69 | IMS_PER_BATCH: 8 70 | # 0.02 at 16 GPUs. 71 | BASE_LR: 0.02 72 | WEIGHT_DECAY: 0.0001 73 | STEPS: (14000,) # this is wrong. 74 | MAX_ITER: 19000 75 | # RESUME_ITER: 16250 76 | TEST: 77 | IMS_PER_BATCH: 8 78 | INTRACLASS_OCCLUSION: True 79 | #ORDER_ONLY: True 80 | PREDICTION_PATHS: [] #["/local/experiments/panoptic/cityscapes/06042019_131251/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] 81 | TEMPORARY_DIR: "/tmp" 82 | NAME: "cityscapes" 83 | DESCRIPTION: "training order head, training new features, fixed solver I hope" 84 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | INPUT: 4 | MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024) 5 | MAX_SIZE_TRAIN: 2048 6 | MIN_SIZE_TEST: 1024 7 | MAX_SIZE_TEST: 2048 8 | MODEL: 9 | META_ARCHITECTURE: "PanopticFPN" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | BACKBONE: 12 | CONV_BODY: "R-50-FPN" 13 | OUT_CHANNELS: 256 14 | RPN: 15 | USE_FPN: True 16 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 17 | PRE_NMS_TOP_N_TRAIN: 2000 18 | PRE_NMS_TOP_N_TEST: 1000 19 | POST_NMS_TOP_N_TEST: 1000 20 | FPN_POST_NMS_TOP_N_TEST: 1000 21 | USE_SEMANTIC_FEATURES: False 22 | RPN_HEAD: "SingleConvRPNHead" 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | NUM_CLASSES: 9 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | ROI_ORDER_HEAD: 41 | BATCH_SIZE_PER_IMAGE: 128 42 | SHARE_MASK_FEATURE_EXTRACTOR: False 43 | ENSURE_CONSISTENCY: True 44 | WEIGHT: 1.0 45 | SEMANTIC: 46 | USE_FPN: True 47 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 48 | UPSAMPLE_MODULE: "One3x3ReLU" 49 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 50 | COLLAPSE_THING_ONTOLOGY: True 51 | UPSAMPLE_METHOD: "bilinear" 52 | NUM_CLASSES: 11 53 | PANOPTIC: 54 | INSTANCE_WEIGHT: 1.0 55 | SEMANTIC_WEIGHT: 1.0 56 | COMPUTE_CC_RESULTS: False 57 | COMPUTE_PRE_RESULTS: True 58 | FUSION: 59 | CONFIDENCE_THRESHOLD: 0.6 60 | OVERLAP_THRESHOLD: 0.5 61 | STUFF_MINIMUM_AREA: 2048 62 | MASK_ON: True 63 | WEIGHT: "/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth" 64 | DATASETS: 65 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",) 66 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) 67 | DATALOADER: 68 | SIZE_DIVISIBILITY: 32 69 | SOLVER: 70 | IMS_PER_BATCH: 8 71 | BASE_LR: 0.01 72 | WEIGHT_DECAY: 0.0001 73 | STEPS: (24000,) 74 | MAX_ITER: 36000 75 | TEST: 76 | IMS_PER_BATCH: 8 77 | PREDICTION_PATHS: ["/local/experiments/panoptic/cityscapes/05242019_133904/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] 78 | TEMPORARY_DIR: "/tmp" 79 | NAME: "cityscapes" 80 | DESCRIPTION: "renewing this effort" 81 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | INPUT: 4 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 5 | MAX_SIZE_TRAIN: 2048 6 | MIN_SIZE_TEST: 1024 7 | MAX_SIZE_TEST: 2048 8 | MODEL: 9 | META_ARCHITECTURE: "PanopticFPN" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | BACKBONE: 12 | CONV_BODY: "R-50-FPN" 13 | OUT_CHANNELS: 256 14 | RPN: 15 | USE_FPN: True 16 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 17 | PRE_NMS_TOP_N_TRAIN: 2000 18 | PRE_NMS_TOP_N_TEST: 1000 19 | POST_NMS_TOP_N_TEST: 1000 20 | FPN_POST_NMS_TOP_N_TEST: 1000 21 | USE_SEMANTIC_FEATURES: False 22 | RPN_HEAD: "SingleConvRPNHead" 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | NUM_CLASSES: 9 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | ROI_ORDER_HEAD: 41 | BATCH_SIZE_PER_IMAGE: 128 42 | SHARE_MASK_FEATURE_EXTRACTOR: False 43 | ENSURE_CONSISTENCY: True 44 | WEIGHT: 1.0 45 | SEMANTIC: 46 | USE_FPN: True 47 | USE_DC: True 48 | SEMANTIC_HEAD: "UpsampleEqualSizeConvConcat" 49 | UPSAMPLE_MODULE: "StraightDeconv" 50 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 51 | COLLAPSE_THING_ONTOLOGY: False 52 | UPSAMPLE_METHOD: "bilinear" 53 | NUM_CLASSES: 11 54 | PANOPTIC: 55 | INSTANCE_WEIGHT: 1.0 56 | SEMANTIC_WEIGHT: 1.0 57 | COMPUTE_CC_RESULTS: False 58 | COMPUTE_PRE_RESULTS: True 59 | FUSION: 60 | CONFIDENCE_THRESHOLD: 0.6 61 | OVERLAP_THRESHOLD: 0.5 62 | STUFF_MINIMUM_AREA: 2048 63 | MASK_ON: True 64 | WEIGHT: "/local/experiments/panoptic/cityscapes/06202019_120811/model_final.pth" 65 | DATASETS: 66 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",) 67 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) 68 | DATALOADER: 69 | SIZE_DIVISIBILITY: 32 70 | SOLVER: 71 | IMS_PER_BATCH: 8 72 | BASE_LR: 0.02 73 | WEIGHT_DECAY: 0.0001 74 | STEPS: (9000,) 75 | MAX_ITER: 12000 76 | TEST: 77 | IMS_PER_BATCH: 8 78 | PREDICTION_PATHS: [] 79 | TEMPORARY_DIR: "/local/tmp" 80 | NAME: "cityscapes" 81 | DESCRIPTION: "hopefully fixed the solver" 82 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc_order.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | INPUT: 4 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 5 | MAX_SIZE_TRAIN: 2048 6 | MIN_SIZE_TEST: 1024 7 | MAX_SIZE_TEST: 2048 8 | MODEL: 9 | META_ARCHITECTURE: "PanopticFPN" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | BACKBONE: 12 | CONV_BODY: "R-50-FPN" 13 | OUT_CHANNELS: 256 14 | RPN: 15 | USE_FPN: True 16 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 17 | PRE_NMS_TOP_N_TRAIN: 2000 18 | PRE_NMS_TOP_N_TEST: 1000 19 | POST_NMS_TOP_N_TEST: 1000 20 | FPN_POST_NMS_TOP_N_TEST: 1000 21 | USE_SEMANTIC_FEATURES: False 22 | RPN_HEAD: "SingleConvRPNHead" 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | NUM_CLASSES: 9 32 | ROI_MASK_HEAD: 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 35 | PREDICTOR: "MaskRCNNC4Predictor" 36 | POOLER_RESOLUTION: 14 37 | POOLER_SAMPLING_RATIO: 2 38 | RESOLUTION: 28 39 | SHARE_BOX_FEATURE_EXTRACTOR: False 40 | ROI_ORDER_HEAD: 41 | OVERLAP_THRESHOLD: 0.05 42 | BATCH_SIZE_PER_IMAGE: 512 43 | SHARE_MASK_FEATURE_EXTRACTOR: False 44 | ENSURE_CONSISTENCY: False 45 | WEIGHT: 1.0 46 | ONLY_TRAIN: True 47 | SEMANTIC: 48 | USE_FPN: True 49 | USE_DC: True 50 | SEMANTIC_HEAD: "UpsampleEqualSizeConvConcat" 51 | UPSAMPLE_MODULE: "StraightDeconv" 52 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 53 | COLLAPSE_THING_ONTOLOGY: False 54 | UPSAMPLE_METHOD: "bilinear" 55 | NUM_CLASSES: 11 56 | PANOPTIC: 57 | INSTANCE_WEIGHT: 1.0 58 | SEMANTIC_WEIGHT: 1.0 59 | COMPUTE_CC_RESULTS: False 60 | COMPUTE_PRE_RESULTS: True 61 | FUSION: 62 | CONFIDENCE_THRESHOLD: 0.6 63 | OVERLAP_THRESHOLD: 0.4 64 | STUFF_MINIMUM_AREA: 2048 65 | MASK_ON: True 66 | ORDER_ON: True 67 | WEIGHT: "/local/experiments/panoptic/cityscapes/06202019_120811/model_final.pth" # "/local/experiments/panoptic/cityscapes/06232019_173719/model_final.pth" # 68 | DATASETS: 69 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle_overlap005",) 70 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) 71 | DATALOADER: 72 | SIZE_DIVISIBILITY: 32 73 | SOLVER: 74 | IMS_PER_BATCH: 8 75 | BASE_LR: 0.02 76 | WEIGHT_DECAY: 0.0001 77 | STEPS: (9000,) 78 | MAX_ITER: 12500 79 | TEST: 80 | IMS_PER_BATCH: 8 81 | INTRACLASS_OCCLUSION: True 82 | PREDICTION_PATHS: ["/local/experiments/panoptic/cityscapes/06232019_221633/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06232019_230716/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06242019_085723/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06232019_230716/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] ["/local/experiments/panoptic/cityscapes/06242019_214756/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] 83 | TEMPORARY_DIR: "/local/tmp" 84 | NAME: "cityscapes" 85 | DESCRIPTION: "hopefully fixed the solver, trying at 005" 86 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | NUM_CLASSES: 9 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | SEMANTIC: 36 | USE_FPN: True 37 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 38 | UPSAMPLE_MODULE: "One3x3ReLU" 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | COLLAPSE_THING_ONTOLOGY: False 41 | UPSAMPLE_METHOD: "bilinear" 42 | NUM_CLASSES: 11 43 | PANOPTIC: 44 | INSTANCE_WEIGHT: 1.0 45 | SEMANTIC_WEIGHT: 1.0 46 | COMPUTE_CC_RESULTS: False 47 | COMPUTE_PRE_RESULTS: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",) 51 | TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | BASE_LR: 0.01 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (18000,) 58 | MAX_ITER: 24000 59 | TEST: 60 | PREDICTION_PATHS: [] #["/scratch/jlazarow/experiments/panoptic/cityscapes/03152019_164312/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] 61 | TEMPORARY_DIR: "/scratch/tmp" 62 | NAME: "cityscapes" 63 | DESCRIPTION: "dense ontology, finetuning from COCO" 64 | -------------------------------------------------------------------------------- /configs/panoptic_2gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: True 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleConvSumCombineScales" 37 | CONV_HEAD_DIM: 128 38 | UPSAMPLE_MODULE: "One3x3ReLU" 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | COLLAPSE_THING_ONTOLOGY: False 41 | UPSAMPLE_METHOD: "bilinear" 42 | PANOPTIC: 43 | INSTANCE_WEIGHT: 1.0 44 | SEMANTIC_WEIGHT: 0.5 45 | MASK_ON: True 46 | DATASETS: 47 | TRAIN: ("panoptic_coco_2017_train",) 48 | TEST: ("panoptic_coco_2017_val",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | IMS_PER_BATCH: 4 53 | BASE_LR: 0.005 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (240000, 320000) 56 | MAX_ITER: 360000 57 | TEST: 58 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 59 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 60 | PREDICTION_PATHS: [] 61 | TEMPORARY_DIR: "/scratch/tmp" 62 | NAME: "prime_rpn" 63 | DESCRIPTION: "priming the RPN with residual semantic features 2GPU" 64 | -------------------------------------------------------------------------------- /configs/panoptic_2gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | COMPUTE_CC_RESULTS: False 45 | MASK_ON: True 46 | ORDER_ON: True 47 | WEIGHT: "/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth" 48 | DATASETS: 49 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 50 | TEST: ("panoptic_coco_2017_test_dev",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | TEST: 59 | CHUNK_PREDICTIONS: True 60 | PREDICTION_PATHS: [] 61 | IMS_PER_BATCH: 16 62 | TEMPORARY_DIR: "/local/tmp" 63 | NAME: "order" 64 | DESCRIPTION: "dense ontology" 65 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" # "UpsampleConvSumCombineScales" # 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | MASK_ON: True 45 | DATASETS: 46 | TRAIN: ("panoptic_coco_2017_train",) 47 | TEST: ("panoptic_coco_2017_val",) 48 | DATALOADER: 49 | SIZE_DIVISIBILITY: 32 50 | SOLVER: 51 | IMS_PER_BATCH: 8 52 | BASE_LR: 0.01 53 | WEIGHT_DECAY: 0.0001 54 | STEPS: (120000, 160000) 55 | MAX_ITER: 180000 56 | TEMPORARY_DIR: "/media/data/jlazarow" 57 | NAME: "baseline" 58 | DESCRIPTION: "dense ontology" 59 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4WithScorePredictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | MASK_ON: True 45 | FUSION_ON: True 46 | DATASETS: 47 | TRAIN: ("panoptic_coco_2017_train",) 48 | TEST: ("panoptic_coco_2017_val",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | IMS_PER_BATCH: 8 53 | BASE_LR: 0.01 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (120000, 160000) 56 | MAX_ITER: 180000 57 | TEMPORARY_DIR: "/media/data/jlazarow" 58 | NAME: "fusion" 59 | DESCRIPTION: "dense ontology, fusion e2e" 60 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | ROI_ORDER_HEAD: 35 | OVERLAP_THRESHOLD: 0.10 36 | BATCH_SIZE_PER_IMAGE: 256 37 | SHARE_MASK_FEATURE_EXTRACTOR: False 38 | ENSURE_CONSISTENCY: True 39 | #ONLY_TRAIN: True 40 | WEIGHT: 1.0 41 | SEMANTIC: 42 | USE_FPN: True 43 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 44 | UPSAMPLE_MODULE: "One3x3ReLU" 45 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 46 | COLLAPSE_THING_ONTOLOGY: False 47 | UPSAMPLE_METHOD: "bilinear" 48 | PANOPTIC: 49 | INSTANCE_WEIGHT: 1.0 50 | SEMANTIC_WEIGHT: 0.5 51 | FUSION: 52 | CONFIDENCE_THRESHOLD: 0.5 53 | OVERLAP_THRESHOLD: 0.5 54 | STUFF_MINIMUM_AREA: 4096 55 | MASK_ON: True 56 | ORDER_ON: False 57 | WEIGHT: "/local/experiments/panoptic/order/06262019_200346/model_final.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/02112019_151538/model_final.pth" # "/local/experiments/panoptic/order/06222019_165802/model_final.pth" "/local/experiments/panoptic/order/06232019_143107/model_final.pth" # "/local/experiments/panoptic/order/06262019_111528/model_final.pth" # 58 | DATASETS: 59 | TRAIN: ("panoptic_coco_2017_train",) 60 | TEST: ("panoptic_coco_2017_val",) 61 | DATALOADER: 62 | SIZE_DIVISIBILITY: 32 63 | SOLVER: 64 | IMS_PER_BATCH: 4 65 | BASE_IMS_PER_BATCH: 8 66 | BASE_LR: 0.01 67 | WEIGHT_DECAY: 0.0001 68 | STEPS: (60000, 120000) 69 | MAX_ITER: 91000 70 | TEST: 71 | PREDICTION_PATHS: ["/local/experiments/panoptic/order/06262019_205807/inference/panoptic_coco_2017_val/predictions.pth"] #"/local/experiments/panoptic/order/06262019_132748/inference/panoptic_coco_2017_val/predictions.pth"] #"/local/experiments/panoptic/order/06252019_143058/inference/panoptic_coco_2017_val/predictions.pth"] 72 | TEMPORARY_DIR: "/local/tmp/" 73 | NAME: "order" 74 | DESCRIPTION: "trying to fix everything but order head" 75 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order_nodense.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: True 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | MASK_ON: True 45 | ORDER_ON: True 46 | DATASETS: 47 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 48 | TEST: ("panoptic_coco_2017_val",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | IMS_PER_BATCH: 8 53 | BASE_LR: 0.01 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (120000, 160000) 56 | MAX_ITER: 180000 57 | TEMPORARY_DIR: "/media/data/jlazarow" 58 | NAME: "order" 59 | DESCRIPTION: "dense ontology, order e2e" 60 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: True 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleConvSumCombineScales" 37 | CONV_HEAD_DIM: 128 38 | UPSAMPLE_MODULE: "One3x3ReLU" 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | COLLAPSE_THING_ONTOLOGY: False 41 | UPSAMPLE_METHOD: "bilinear" 42 | PANOPTIC: 43 | INSTANCE_WEIGHT: 1.0 44 | SEMANTIC_WEIGHT: 0.5 45 | MASK_ON: True 46 | DATASETS: 47 | TRAIN: ("panoptic_coco_2017_train",) 48 | TEST: ("panoptic_coco_2017_val",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | IMS_PER_BATCH: 8 53 | BASE_LR: 0.01 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (120000, 160000) 56 | MAX_ITER: 180000 57 | TEST: 58 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 59 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 60 | PREDICTION_PATHS: [] 61 | TEMPORARY_DIR: "/scratch/tmp" 62 | NAME: "prime_rpn" 63 | DESCRIPTION: "priming the RPN with residual semantic features 2GPU" 64 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_mask_rcnn_R_101_FPN_1x_test_dev.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | ROI_ORDER_HEAD: 35 | BATCH_SIZE_PER_IMAGE: 128 36 | SHARE_MASK_FEATURE_EXTRACTOR: False 37 | ENSURE_CONSISTENCY: True 38 | WEIGHT: 1.0 39 | SEMANTIC: 40 | USE_FPN: True 41 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 42 | UPSAMPLE_MODULE: "One3x3ReLU" 43 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 44 | COLLAPSE_THING_ONTOLOGY: True 45 | UPSAMPLE_METHOD: "bilinear" 46 | PANOPTIC: 47 | INSTANCE_WEIGHT: 1.0 48 | SEMANTIC_WEIGHT: 0.5 49 | COMPUTE_CC_RESULTS: False 50 | COMPUTE_PRE_RESULTS: False 51 | MASK_ON: True 52 | ORDER_ON: True 53 | WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0180000.pth" #03092019_234338/model_0195200.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth" #/local/experiments/panoptic/order/03142019_125645/model_final.pth 54 | DATASETS: 55 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 56 | TEST: ("panoptic_coco_2017_val",) 57 | DATALOADER: 58 | SIZE_DIVISIBILITY: 32 59 | SOLVER: 60 | IMS_PER_BATCH: 8 61 | BASE_LR: 0.01 62 | WEIGHT_DECAY: 0.0001 63 | STEPS: (120000, 160000) #(60000, 80000) 64 | MAX_ITER: 185000 #90000 65 | TEST: 66 | CHUNK_PREDICTIONS: True 67 | PREDICTION_PATHS: [] 68 | IMS_PER_BATCH: 8 69 | TEMPORARY_DIR: "/local/tmp" 70 | NAME: "order" 71 | DESCRIPTION: "dense ontology, R101" 72 | -------------------------------------------------------------------------------- /configs/panoptic_4gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: True 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | COMPUTE_CC_RESULTS: False 45 | MASK_ON: True 46 | ORDER_ON: True 47 | WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0195200.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth" 48 | DATASETS: 49 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 50 | TEST: ("panoptic_coco_2017_test_dev",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | TEST: 59 | CHUNK_PREDICTIONS: True 60 | PREDICTION_PATHS: [] 61 | IMS_PER_BATCH: 16 62 | TEMPORARY_DIR: "/local/tmp" 63 | NAME: "order" 64 | DESCRIPTION: "dense ontology" 65 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_101_FPN_1x_order.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | ROI_ORDER_HEAD: 35 | BATCH_SIZE_PER_IMAGE: 128 36 | SHARE_MASK_FEATURE_EXTRACTOR: False 37 | ENSURE_CONSISTENCY: True 38 | WEIGHT: 1.0 39 | SEMANTIC: 40 | USE_FPN: True 41 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 42 | UPSAMPLE_MODULE: "One3x3ReLU" 43 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 44 | COLLAPSE_THING_ONTOLOGY: True 45 | UPSAMPLE_METHOD: "bilinear" 46 | PANOPTIC: 47 | INSTANCE_WEIGHT: 1.0 48 | SEMANTIC_WEIGHT: 0.5 49 | COMPUTE_CC_RESULTS: False 50 | MASK_ON: True 51 | ORDER_ON: True 52 | WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0180000.pth" 53 | DATASETS: 54 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 55 | TEST: ("panoptic_coco_2017_val",) 56 | DATALOADER: 57 | SIZE_DIVISIBILITY: 32 58 | SOLVER: 59 | IMS_PER_BATCH: 8 60 | BASE_LR: 0.02 61 | WEIGHT_DECAY: 0.0001 62 | STEPS: (60000, 80000) 63 | MAX_ITER: 90000 64 | TEST: 65 | IMS_PER_BATCH: 8 66 | PREDICTION_PATHS: [] 67 | TEMPORARY_DIR: "/local/tmp" 68 | NAME: "order" 69 | DESCRIPTION: "collapsed ontology, R101, order own features" 70 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | ROI_HEADS: 17 | USE_FPN: True 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | SEMANTIC: 33 | USE_FPN: True 34 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 35 | UPSAMPLE_MODULE: "One3x3ReLU" 36 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 37 | COLLAPSE_THING_ONTOLOGY: False 38 | UPSAMPLE_METHOD: "bilinear" 39 | PANOPTIC: 40 | INSTANCE_WEIGHT: 1.0 41 | SEMANTIC_WEIGHT: 0.5 42 | MASK_ON: True 43 | DATASETS: 44 | TRAIN: ("panoptic_coco_2017_train",) 45 | TEST: ("panoptic_coco_2017_val",) 46 | DATALOADER: 47 | SIZE_DIVISIBILITY: 32 48 | SOLVER: 49 | BASE_LR: 0.02 50 | WEIGHT_DECAY: 0.0001 51 | STEPS: (60000, 80000) 52 | MAX_ITER: 90000 53 | TEST: 54 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 55 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 56 | PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth"] 57 | TEMPORARY_DIR: "/media/data/jlazarow" 58 | NAME: "baseline" 59 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4WithScorePredictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | MASK_ON: True 45 | FUSION_ON: True 46 | DATASETS: 47 | TRAIN: ("panoptic_coco_2017_train",) 48 | TEST: ("panoptic_coco_2017_tiny",) 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | BASE_LR: 0.02 53 | WEIGHT_DECAY: 0.0001 54 | STEPS: (60000, 80000) 55 | MAX_ITER: 90000 56 | TEST: 57 | PREDICTION_PATHS: [] 58 | TEMPORARY_DIR: "/scratch/tmp" 59 | NAME: "fusion" 60 | DESCRIPTION: "dense ontology, fine tuning" 61 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | USE_SEMANTIC_FEATURES: True 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | ROI_ORDER_HEAD: 36 | BATCH_SIZE_PER_IMAGE: 128 37 | SHARE_MASK_FEATURE_EXTRACTOR: False 38 | ENSURE_CONSISTENCY: True 39 | WEIGHT: 1.0 40 | SEMANTIC: 41 | USE_FPN: True 42 | SEMANTIC_HEAD: "UpsampleConvSumCombineScales" #"UpsampleEqualSizeConvSum" 43 | UPSAMPLE_MODULE: "One3x3ReLU" 44 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 45 | COLLAPSE_THING_ONTOLOGY: False 46 | UPSAMPLE_METHOD: "bilinear" 47 | PANOPTIC: 48 | INSTANCE_WEIGHT: 1.0 49 | SEMANTIC_WEIGHT: 0.5 50 | COMPUTE_CC_RESULTS: False 51 | COMPUTE_PRE_RESULTS: False 52 | MASK_ON: True 53 | ORDER_ON: True 54 | DATASETS: 55 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 56 | TEST: ("panoptic_coco_2017_val",) 57 | DATALOADER: 58 | SIZE_DIVISIBILITY: 32 59 | SOLVER: 60 | BASE_LR: 0.02 61 | WEIGHT_DECAY: 0.0001 62 | STEPS: (60000, 80000) 63 | MAX_ITER: 90000 64 | TEST: 65 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 66 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 67 | PREDICTION_PATHS: [] #["/mnt/cube/jlazarow/experiments/panoptic/baseline/03072019_184209/inference/panoptic_coco_2017_val/predictions.pth"] 68 | TEMPORARY_DIR: "/local/tmp" 69 | NAME: "baseline" 70 | DESCRIPTION: "dense ontology, trying 0.6 confidence." 71 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_test.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | COMPUTE_CC_RESULTS: False 45 | MASK_ON: True 46 | ORDER_ON: True 47 | DATASETS: 48 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 49 | TEST: ("panoptic_coco_2017_test_dev",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | BASE_LR: 0.02 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (60000, 80000) 56 | MAX_ITER: 90000 57 | TEST: 58 | CHUNK_PREDICTIONS: True 59 | PREDICTION_PATHS: [["/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file0.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file1.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file2.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file3.pth"]] 60 | #PREDICTION_PATHS: [] #"/local/experiments/panoptic/03102019_151246/inference/panoptic_coco_2017_test_dev/predictions.pth"] 61 | TEMPORARY_DIR: "/local/tmp" 62 | NAME: "baseline" 63 | DESCRIPTION: "for testdev from panoptic fusion baseline" 64 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_val.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: False 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | SEMANTIC: 35 | USE_FPN: True 36 | SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" 37 | UPSAMPLE_MODULE: "One3x3ReLU" 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | COLLAPSE_THING_ONTOLOGY: False 40 | UPSAMPLE_METHOD: "bilinear" 41 | PANOPTIC: 42 | INSTANCE_WEIGHT: 1.0 43 | SEMANTIC_WEIGHT: 0.5 44 | COMPUTE_CC_RESULTS: False 45 | MASK_ON: True 46 | ORDER_ON: True 47 | DATASETS: 48 | TRAIN: ("panoptic_coco_2017_train_overlap02",) 49 | TEST: ("panoptic_coco_2017_val_overlap02",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | BASE_LR: 0.02 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (60000, 80000) 56 | MAX_ITER: 90000 57 | TEST: 58 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 59 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 60 | PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03072019_184209/inference/panoptic_coco_2017_val/predictions.pth"] 61 | FEED_GROUND_TRUTH_INSTANCES: True 62 | ORDER_ONLY: True 63 | TEMPORARY_DIR: "/scratch/tmp" 64 | NAME: "baseline" 65 | DESCRIPTION: "dense ontology" 66 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.py: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: True 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | USE_SEMANTIC_FEATURES: True 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | SEMANTIC: 36 | USE_FPN: True 37 | SEMANTIC_HEAD: "UpsampleConvSumCombineScales" 38 | CONV_HEAD_DIM: 128 39 | UPSAMPLE_MODULE: "One3x3ReLU" 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | COLLAPSE_THING_ONTOLOGY: False 42 | UPSAMPLE_METHOD: "bilinear" 43 | PANOPTIC: 44 | INSTANCE_WEIGHT: 1.0 45 | SEMANTIC_WEIGHT: 0.5 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("panoptic_coco_2017_train",) 49 | TEST: ("panoptic_coco_2017_val",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | IMS_PER_BATCH: 8 54 | BASE_LR: 0.01 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (120000, 160000) 57 | MAX_ITER: 180000 58 | TEST: 59 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 60 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 61 | PREDICTION_PATHS: [] 62 | TEMPORARY_DIR: "/media/data/jlazarow" 63 | NAME: "prime_rpn_roi" 64 | DESCRIPTION: "priming the RPN and ROI heads with residual semantic features 4GPU" 65 | -------------------------------------------------------------------------------- /configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml: -------------------------------------------------------------------------------- 1 | TASK: 2 | KIND: "Panoptic" 3 | MODEL: 4 | META_ARCHITECTURE: "PanopticFPN" 5 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN" 8 | OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TRAIN: 2000 13 | PRE_NMS_TOP_N_TEST: 1000 14 | POST_NMS_TOP_N_TEST: 1000 15 | FPN_POST_NMS_TOP_N_TEST: 1000 16 | USE_SEMANTIC_FEATURES: True 17 | RPN_HEAD: "SingleConvRPNHead" 18 | ROI_HEADS: 19 | USE_FPN: True 20 | USE_SEMANTIC_FEATURES: True 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | SEMANTIC: 36 | USE_FPN: True 37 | SEMANTIC_HEAD: "UpsampleConvSumCombineScales" 38 | CONV_HEAD_DIM: 128 39 | UPSAMPLE_MODULE: "One3x3ReLU" 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | COLLAPSE_THING_ONTOLOGY: False 42 | UPSAMPLE_METHOD: "bilinear" 43 | PANOPTIC: 44 | INSTANCE_WEIGHT: 1.0 45 | SEMANTIC_WEIGHT: 0.5 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("panoptic_coco_2017_train",) 49 | TEST: ("panoptic_coco_2017_tiny",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | BASE_LR: 0.02 54 | WEIGHT_DECAY: 0.0001 55 | STEPS: (60000, 80000) 56 | MAX_ITER: 90000 57 | TEST: 58 | # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth 59 | # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth 60 | PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/panoptic/prime_rpn_roi/02192019_182845/inference/panoptic_coco_2017_val/predictions.pth"] 61 | TEMPORARY_DIR: "/scratch/tmp" 62 | NAME: "prime_rpn" 63 | DESCRIPTION: "priming the RPN with residual semantic features" 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 14 | const at::Tensor& rois, 15 | const float spatial_scale, 16 | const int pooled_height, 17 | const int pooled_width, 18 | const int batch_size, 19 | const int channels, 20 | const int height, 21 | const int width, 22 | const int sampling_ratio); 23 | 24 | 25 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 26 | const at::Tensor& rois, 27 | const float spatial_scale, 28 | const int pooled_height, 29 | const int pooled_width); 30 | 31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 32 | const at::Tensor& input, 33 | const at::Tensor& rois, 34 | const at::Tensor& argmax, 35 | const float spatial_scale, 36 | const int pooled_height, 37 | const int pooled_width, 38 | const int batch_size, 39 | const int channels, 40 | const int height, 41 | const int width); 42 | 43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 44 | 45 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, 46 | at::Tensor offset, at::Tensor output, 47 | at::Tensor columns, at::Tensor ones, int kW, 48 | int kH, int dW, int dH, int padW, int padH, 49 | int dilationW, int dilationH, int group, 50 | int deformable_group, int im2col_step); 51 | 52 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, 53 | at::Tensor gradOutput, at::Tensor gradInput, 54 | at::Tensor gradOffset, at::Tensor weight, 55 | at::Tensor columns, int kW, int kH, int dW, 56 | int dH, int padW, int padH, int dilationW, 57 | int dilationH, int group, 58 | int deformable_group, int im2col_step); 59 | 60 | int deform_conv_backward_parameters_cuda( 61 | at::Tensor input, at::Tensor offset, at::Tensor gradOutput, 62 | at::Tensor gradWeight, // at::Tensor gradBias, 63 | at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, 64 | int padW, int padH, int dilationW, int dilationH, int group, 65 | int deformable_group, float scale, int im2col_step); 66 | 67 | void modulated_deform_conv_cuda_forward( 68 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 69 | at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, 70 | int kernel_h, int kernel_w, const int stride_h, const int stride_w, 71 | const int pad_h, const int pad_w, const int dilation_h, 72 | const int dilation_w, const int group, const int deformable_group, 73 | const bool with_bias); 74 | 75 | void modulated_deform_conv_cuda_backward( 76 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 77 | at::Tensor offset, at::Tensor mask, at::Tensor columns, 78 | at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, 79 | at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, 80 | int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, 81 | int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, 82 | const bool with_bias); 83 | 84 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 85 | const int height, 86 | const int width); 87 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/deform_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | int deform_conv_forward( 12 | at::Tensor input, 13 | at::Tensor weight, 14 | at::Tensor offset, 15 | at::Tensor output, 16 | at::Tensor columns, 17 | at::Tensor ones, 18 | int kW, 19 | int kH, 20 | int dW, 21 | int dH, 22 | int padW, 23 | int padH, 24 | int dilationW, 25 | int dilationH, 26 | int group, 27 | int deformable_group, 28 | int im2col_step) 29 | { 30 | if (input.type().is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return deform_conv_forward_cuda( 33 | input, weight, offset, output, columns, ones, 34 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 35 | group, deformable_group, im2col_step 36 | ); 37 | #else 38 | AT_ERROR("Not compiled with GPU support"); 39 | #endif 40 | } 41 | AT_ERROR("Not implemented on the CPU"); 42 | } 43 | 44 | 45 | int deform_conv_backward_input( 46 | at::Tensor input, 47 | at::Tensor offset, 48 | at::Tensor gradOutput, 49 | at::Tensor gradInput, 50 | at::Tensor gradOffset, 51 | at::Tensor weight, 52 | at::Tensor columns, 53 | int kW, 54 | int kH, 55 | int dW, 56 | int dH, 57 | int padW, 58 | int padH, 59 | int dilationW, 60 | int dilationH, 61 | int group, 62 | int deformable_group, 63 | int im2col_step) 64 | { 65 | if (input.type().is_cuda()) { 66 | #ifdef WITH_CUDA 67 | return deform_conv_backward_input_cuda( 68 | input, offset, gradOutput, gradInput, gradOffset, weight, columns, 69 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 70 | group, deformable_group, im2col_step 71 | ); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | AT_ERROR("Not implemented on the CPU"); 77 | } 78 | 79 | 80 | int deform_conv_backward_parameters( 81 | at::Tensor input, 82 | at::Tensor offset, 83 | at::Tensor gradOutput, 84 | at::Tensor gradWeight, // at::Tensor gradBias, 85 | at::Tensor columns, 86 | at::Tensor ones, 87 | int kW, 88 | int kH, 89 | int dW, 90 | int dH, 91 | int padW, 92 | int padH, 93 | int dilationW, 94 | int dilationH, 95 | int group, 96 | int deformable_group, 97 | float scale, 98 | int im2col_step) 99 | { 100 | if (input.type().is_cuda()) { 101 | #ifdef WITH_CUDA 102 | return deform_conv_backward_parameters_cuda( 103 | input, offset, gradOutput, gradWeight, columns, ones, 104 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 105 | group, deformable_group, scale, im2col_step 106 | ); 107 | #else 108 | AT_ERROR("Not compiled with GPU support"); 109 | #endif 110 | } 111 | AT_ERROR("Not implemented on the CPU"); 112 | } 113 | 114 | 115 | void modulated_deform_conv_forward( 116 | at::Tensor input, 117 | at::Tensor weight, 118 | at::Tensor bias, 119 | at::Tensor ones, 120 | at::Tensor offset, 121 | at::Tensor mask, 122 | at::Tensor output, 123 | at::Tensor columns, 124 | int kernel_h, 125 | int kernel_w, 126 | const int stride_h, 127 | const int stride_w, 128 | const int pad_h, 129 | const int pad_w, 130 | const int dilation_h, 131 | const int dilation_w, 132 | const int group, 133 | const int deformable_group, 134 | const bool with_bias) 135 | { 136 | if (input.type().is_cuda()) { 137 | #ifdef WITH_CUDA 138 | return modulated_deform_conv_cuda_forward( 139 | input, weight, bias, ones, offset, mask, output, columns, 140 | kernel_h, kernel_w, stride_h, stride_w, 141 | pad_h, pad_w, dilation_h, dilation_w, 142 | group, deformable_group, with_bias 143 | ); 144 | #else 145 | AT_ERROR("Not compiled with GPU support"); 146 | #endif 147 | } 148 | AT_ERROR("Not implemented on the CPU"); 149 | } 150 | 151 | 152 | void modulated_deform_conv_backward( 153 | at::Tensor input, 154 | at::Tensor weight, 155 | at::Tensor bias, 156 | at::Tensor ones, 157 | at::Tensor offset, 158 | at::Tensor mask, 159 | at::Tensor columns, 160 | at::Tensor grad_input, 161 | at::Tensor grad_weight, 162 | at::Tensor grad_bias, 163 | at::Tensor grad_offset, 164 | at::Tensor grad_mask, 165 | at::Tensor grad_output, 166 | int kernel_h, 167 | int kernel_w, 168 | int stride_h, 169 | int stride_w, 170 | int pad_h, 171 | int pad_w, 172 | int dilation_h, 173 | int dilation_w, 174 | int group, 175 | int deformable_group, 176 | const bool with_bias) 177 | { 178 | if (input.type().is_cuda()) { 179 | #ifdef WITH_CUDA 180 | return modulated_deform_conv_cuda_backward( 181 | input, weight, bias, ones, offset, mask, columns, 182 | grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, 183 | kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, 184 | group, deformable_group, with_bias 185 | ); 186 | #else 187 | AT_ERROR("Not compiled with GPU support"); 188 | #endif 189 | } 190 | AT_ERROR("Not implemented on the CPU"); 191 | } -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "deform_conv.h" 6 | 7 | 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 9 | m.def("nms", &nms, "non-maximum suppression"); 10 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 11 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 12 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 13 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 14 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 15 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 16 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 17 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 18 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 19 | } 20 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset, COCOTestDataset 3 | from .panoptic_cityscapes import PanopticCityscapesDataset 4 | from .panoptic_coco import PanopticCOCODataset 5 | from .voc import PascalVOCDataset 6 | from .concat_dataset import ConcatDataset 7 | 8 | __all__ = ["COCODataset", "PanopticCityscapesDataset", "PanopticCOCODataset", "ConcatDataset", "PascalVOCDataset", "COCOTestDataset"] 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | 22 | # todo, change this at some point. 23 | if isinstance(dataset, (datasets.COCODataset, datasets.PanopticCOCODataset, datasets.COCOTestDataset, datasets.PanopticCityscapesDataset)): 24 | return coco_evaluation(**args) 25 | elif isinstance(dataset, datasets.PascalVOCDataset): 26 | return voc_evaluation(**args) 27 | else: 28 | dataset_name = dataset.__class__.__name__ 29 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 30 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | working_directory, 13 | save_panoptic_results, 14 | save_pre_results, 15 | panoptic_confidence_thresh, 16 | panoptic_overlap_thresh, 17 | panoptic_stuff_min_area): 18 | return do_coco_evaluation( 19 | dataset=dataset, 20 | predictions=predictions, 21 | box_only=box_only, 22 | output_folder=output_folder, 23 | iou_types=iou_types, 24 | expected_results=expected_results, 25 | expected_results_sigma_tol=expected_results_sigma_tol, 26 | working_directory=working_directory, 27 | save_panoptic_results=save_panoptic_results, 28 | save_pre_results=save_pre_results, 29 | panoptic_confidence_thresh=panoptic_confidence_thresh, 30 | panoptic_overlap_thresh=panoptic_overlap_thresh, 31 | panoptic_stuff_min_area=panoptic_stuff_min_area) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.utils.data 5 | from PIL import Image 6 | import sys 7 | 8 | if sys.version_info[0] == 2: 9 | import xml.etree.cElementTree as ET 10 | else: 11 | import xml.etree.ElementTree as ET 12 | 13 | 14 | from maskrcnn_benchmark.structures.bounding_box import BoxList 15 | 16 | 17 | class PascalVOCDataset(torch.utils.data.Dataset): 18 | 19 | CLASSES = ( 20 | "__background__ ", 21 | "aeroplane", 22 | "bicycle", 23 | "bird", 24 | "boat", 25 | "bottle", 26 | "bus", 27 | "car", 28 | "cat", 29 | "chair", 30 | "cow", 31 | "diningtable", 32 | "dog", 33 | "horse", 34 | "motorbike", 35 | "person", 36 | "pottedplant", 37 | "sheep", 38 | "sofa", 39 | "train", 40 | "tvmonitor", 41 | ) 42 | 43 | def __init__(self, data_dir, split, use_difficult=False, transforms=None): 44 | self.root = data_dir 45 | self.image_set = split 46 | self.keep_difficult = use_difficult 47 | self.transforms = transforms 48 | 49 | self._annopath = os.path.join(self.root, "Annotations", "%s.xml") 50 | self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg") 51 | self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt") 52 | 53 | with open(self._imgsetpath % self.image_set) as f: 54 | self.ids = f.readlines() 55 | self.ids = [x.strip("\n") for x in self.ids] 56 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 57 | 58 | cls = PascalVOCDataset.CLASSES 59 | self.class_to_ind = dict(zip(cls, range(len(cls)))) 60 | 61 | def __getitem__(self, index): 62 | img_id = self.ids[index] 63 | img = Image.open(self._imgpath % img_id).convert("RGB") 64 | 65 | target = self.get_groundtruth(index) 66 | target = target.clip_to_image(remove_empty=True) 67 | 68 | if self.transforms is not None: 69 | img, target = self.transforms(img, target) 70 | 71 | return img, target, index 72 | 73 | def __len__(self): 74 | return len(self.ids) 75 | 76 | def get_groundtruth(self, index): 77 | img_id = self.ids[index] 78 | anno = ET.parse(self._annopath % img_id).getroot() 79 | anno = self._preprocess_annotation(anno) 80 | 81 | height, width = anno["im_info"] 82 | target = BoxList(anno["boxes"], (width, height), mode="xyxy") 83 | target.add_field("labels", anno["labels"]) 84 | target.add_field("difficult", anno["difficult"]) 85 | return target 86 | 87 | def _preprocess_annotation(self, target): 88 | boxes = [] 89 | gt_classes = [] 90 | difficult_boxes = [] 91 | TO_REMOVE = 1 92 | 93 | for obj in target.iter("object"): 94 | difficult = int(obj.find("difficult").text) == 1 95 | if not self.keep_difficult and difficult: 96 | continue 97 | name = obj.find("name").text.lower().strip() 98 | bb = obj.find("bndbox") 99 | # Make pixel indexes 0-based 100 | # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211" 101 | box = [ 102 | bb.find("xmin").text, 103 | bb.find("ymin").text, 104 | bb.find("xmax").text, 105 | bb.find("ymax").text, 106 | ] 107 | bndbox = tuple( 108 | map(lambda x: x - TO_REMOVE, list(map(int, box))) 109 | ) 110 | 111 | boxes.append(bndbox) 112 | gt_classes.append(self.class_to_ind[name]) 113 | difficult_boxes.append(difficult) 114 | 115 | size = target.find("size") 116 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 117 | 118 | res = { 119 | "boxes": torch.tensor(boxes, dtype=torch.float32), 120 | "labels": torch.tensor(gt_classes), 121 | "difficult": torch.tensor(difficult_boxes), 122 | "im_info": im_info, 123 | } 124 | return res 125 | 126 | def get_img_info(self, index): 127 | img_id = self.ids[index] 128 | anno = ET.parse(self._annopath % img_id).getroot() 129 | size = anno.find("size") 130 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 131 | return {"height": im_info[0], "width": im_info[1]} 132 | 133 | def map_class_id_to_class_name(self, class_id): 134 | return PascalVOCDataset.CLASSES[class_id] 135 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = True 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import itertools 3 | 4 | import torch 5 | from torch.utils.data.sampler import BatchSampler 6 | from torch.utils.data.sampler import Sampler 7 | 8 | 9 | class GroupedBatchSampler(BatchSampler): 10 | """ 11 | Wraps another sampler to yield a mini-batch of indices. 12 | It enforces that elements from the same group should appear in groups of batch_size. 13 | It also tries to provide mini-batches which follows an ordering which is 14 | as close as possible to the ordering from the original sampler. 15 | 16 | Arguments: 17 | sampler (Sampler): Base sampler. 18 | batch_size (int): Size of mini-batch. 19 | drop_uneven (bool): If ``True``, the sampler will drop the batches whose 20 | size is less than ``batch_size`` 21 | 22 | """ 23 | 24 | def __init__(self, sampler, group_ids, batch_size, drop_uneven=False): 25 | if not isinstance(sampler, Sampler): 26 | raise ValueError( 27 | "sampler should be an instance of " 28 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 29 | ) 30 | self.sampler = sampler 31 | self.group_ids = torch.as_tensor(group_ids) 32 | assert self.group_ids.dim() == 1 33 | self.batch_size = batch_size 34 | self.drop_uneven = drop_uneven 35 | 36 | self.groups = torch.unique(self.group_ids).sort(0)[0] 37 | 38 | self._can_reuse_batches = False 39 | 40 | def _prepare_batches(self): 41 | dataset_size = len(self.group_ids) 42 | # get the sampled indices from the sampler 43 | sampled_ids = torch.as_tensor(list(self.sampler)) 44 | # potentially not all elements of the dataset were sampled 45 | # by the sampler (e.g., DistributedSampler). 46 | # construct a tensor which contains -1 if the element was 47 | # not sampled, and a non-negative number indicating the 48 | # order where the element was sampled. 49 | # for example. if sampled_ids = [3, 1] and dataset_size = 5, 50 | # the order is [-1, 1, -1, 0, -1] 51 | order = torch.full((dataset_size,), -1, dtype=torch.int64) 52 | order[sampled_ids] = torch.arange(len(sampled_ids)) 53 | 54 | # get a mask with the elements that were sampled 55 | mask = order >= 0 56 | 57 | # find the elements that belong to each individual cluster 58 | clusters = [(self.group_ids == i) & mask for i in self.groups] 59 | # get relative order of the elements inside each cluster 60 | # that follows the order from the sampler 61 | relative_order = [order[cluster] for cluster in clusters] 62 | # with the relative order, find the absolute order in the 63 | # sampled space 64 | permutation_ids = [s[s.sort()[1]] for s in relative_order] 65 | # permute each cluster so that they follow the order from 66 | # the sampler 67 | permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] 68 | 69 | # splits each cluster in batch_size, and merge as a list of tensors 70 | splits = [c.split(self.batch_size) for c in permuted_clusters] 71 | merged = tuple(itertools.chain.from_iterable(splits)) 72 | 73 | # now each batch internally has the right order, but 74 | # they are grouped by clusters. Find the permutation between 75 | # different batches that brings them as close as possible to 76 | # the order that we have in the sampler. For that, we will consider the 77 | # ordering as coming from the first element of each batch, and sort 78 | # correspondingly 79 | first_element_of_batch = [t[0].item() for t in merged] 80 | # get and inverse mapping from sampled indices and the position where 81 | # they occur (as returned by the sampler) 82 | inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} 83 | # from the first element in each batch, get a relative ordering 84 | first_index_of_batch = torch.as_tensor( 85 | [inv_sampled_ids_map[s] for s in first_element_of_batch] 86 | ) 87 | 88 | # permute the batches so that they approximately follow the order 89 | # from the sampler 90 | permutation_order = first_index_of_batch.sort(0)[1].tolist() 91 | # finally, permute the batches 92 | batches = [merged[i].tolist() for i in permutation_order] 93 | 94 | if self.drop_uneven: 95 | kept = [] 96 | for batch in batches: 97 | if len(batch) == self.batch_size: 98 | kept.append(batch) 99 | batches = kept 100 | return batches 101 | 102 | def __iter__(self): 103 | if self._can_reuse_batches: 104 | batches = self._batches 105 | self._can_reuse_batches = False 106 | else: 107 | batches = self._prepare_batches() 108 | self._batches = batches 109 | return iter(batches) 110 | 111 | def __len__(self): 112 | if not hasattr(self, "_batches"): 113 | self._batches = self._prepare_batches() 114 | self._can_reuse_batches = True 115 | return len(self._batches) 116 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | else: 11 | min_size = cfg.INPUT.MIN_SIZE_TEST 12 | max_size = cfg.INPUT.MAX_SIZE_TEST 13 | flip_prob = 0 14 | 15 | to_bgr255 = cfg.INPUT.TO_BGR255 16 | normalize_transform = T.Normalize( 17 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 18 | ) 19 | 20 | transform = T.Compose( 21 | [ 22 | T.Resize(min_size, max_size), 23 | T.RandomHorizontalFlip(flip_prob), 24 | T.ToTensor(), 25 | normalize_transform, 26 | ] 27 | ) 28 | return transform 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import random 3 | 4 | import torch 5 | import torchvision 6 | from torchvision.transforms import functional as F 7 | 8 | 9 | class Compose(object): 10 | def __init__(self, transforms): 11 | self.transforms = transforms 12 | 13 | def __call__(self, image, target): 14 | for t in self.transforms: 15 | image, target = t(image, target) 16 | return image, target 17 | 18 | def __repr__(self): 19 | format_string = self.__class__.__name__ + "(" 20 | for t in self.transforms: 21 | format_string += "\n" 22 | format_string += " {0}".format(t) 23 | format_string += "\n)" 24 | return format_string 25 | 26 | """ 27 | class Resize(object): 28 | def __init__(self, min_size, max_size): 29 | self.min_size = min_size 30 | self.max_size = max_size 31 | 32 | # modified from torchvision to add support for max size 33 | def get_size(self, image_size): 34 | w, h = image_size 35 | size = self.min_size 36 | max_size = self.max_size 37 | if max_size is not None: 38 | min_original_size = float(min((w, h))) 39 | max_original_size = float(max((w, h))) 40 | if max_original_size / min_original_size * size > max_size: 41 | size = int(round(max_size * min_original_size / max_original_size)) 42 | 43 | if (w <= h and w == size) or (h <= w and h == size): 44 | return (h, w) 45 | 46 | if w < h: 47 | ow = size 48 | oh = int(size * h / w) 49 | else: 50 | oh = size 51 | ow = int(size * w / h) 52 | 53 | return (oh, ow) 54 | """ 55 | class Resize(object): 56 | def __init__(self, min_size, max_size): 57 | if not isinstance(min_size, (list, tuple)): 58 | min_size = (min_size,) 59 | self.min_size = min_size 60 | self.max_size = max_size 61 | 62 | # modified from torchvision to add support for max size 63 | def get_size(self, image_size): 64 | w, h = image_size 65 | size = random.choice(self.min_size) 66 | max_size = self.max_size 67 | """ 68 | if max_size is not None: 69 | min_original_size = float(min((w, h))) 70 | max_original_size = float(max((w, h))) 71 | if max_original_size / min_original_size * size > max_size: 72 | size = int(round(max_size * min_original_size / max_original_size)) 73 | """ 74 | if (w <= h and w == size) or (h <= w and h == size): 75 | return (h, w) 76 | 77 | if w < h: 78 | ow = size 79 | oh = int(size * h / w) 80 | else: 81 | oh = size 82 | ow = int(size * w / h) 83 | 84 | return (oh, ow) 85 | 86 | def __call__(self, image, target): 87 | size = self.get_size(image.size) 88 | image = F.resize(image, size) 89 | target = target.resize(image.size) 90 | return image, target 91 | 92 | class RandomHorizontalFlip(object): 93 | def __init__(self, prob=0.5): 94 | self.prob = prob 95 | 96 | def __call__(self, image, target): 97 | if random.random() < self.prob: 98 | image = F.hflip(image) 99 | target = target.transpose(0) 100 | return image, target 101 | 102 | 103 | class ToTensor(object): 104 | def __call__(self, image, target): 105 | return F.to_tensor(image), target 106 | 107 | 108 | class Normalize(object): 109 | def __init__(self, mean, std, to_bgr255=True): 110 | self.mean = mean 111 | self.std = std 112 | self.to_bgr255 = to_bgr255 113 | 114 | def __call__(self, image, target): 115 | if self.to_bgr255: 116 | image = image[[2, 1, 0]] * 255 117 | image = F.normalize(image, mean=self.mean, std=self.std) 118 | return image, target 119 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import ConvTranspose2d 7 | from .misc import interpolate 8 | from .nms import nms 9 | from .roi_align import ROIAlign 10 | from .roi_align import roi_align 11 | from .roi_pool import ROIPool 12 | from .roi_pool import roi_pool 13 | from .smooth_l1_loss import smooth_l1_loss 14 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 15 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 16 | 17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 18 | "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", 19 | "FrozenBatchNorm2d"] 20 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # 4 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform 8 | from . import fpn as fpn_module 9 | from . import resnet 10 | 11 | 12 | @registry.BACKBONES.register("R-50-C4") 13 | @registry.BACKBONES.register("R-50-C5") 14 | @registry.BACKBONES.register("R-101-C4") 15 | @registry.BACKBONES.register("R-101-C5") 16 | def build_resnet_backbone(cfg): 17 | body = resnet.ResNet(cfg) 18 | model = nn.Sequential(OrderedDict([("body", body)])) 19 | return model 20 | 21 | 22 | @registry.BACKBONES.register("R-50-FPN") 23 | @registry.BACKBONES.register("R-101-FPN") 24 | def build_resnet_fpn_backbone(cfg): 25 | body = resnet.ResNet(cfg) 26 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 27 | out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS 28 | fpn = fpn_module.FPN( 29 | in_channels_list=[ 30 | in_channels_stage2, 31 | in_channels_stage2 * 2, 32 | in_channels_stage2 * 4, 33 | in_channels_stage2 * 8, 34 | ], 35 | out_channels=out_channels, 36 | conv_block=conv_with_kaiming_uniform( 37 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 38 | ), 39 | top_blocks=fpn_module.LastLevelMaxPool(), 40 | ) 41 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 42 | return model 43 | 44 | 45 | def build_backbone(cfg): 46 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 47 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 48 | cfg.MODEL.BACKBONE.CONV_BODY 49 | ) 50 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 51 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | 7 | class FPN(nn.Module): 8 | """ 9 | Module that adds FPN on top of a list of feature maps. 10 | The feature maps are currently supposed to be in increasing depth 11 | order, and must be consecutive 12 | """ 13 | 14 | def __init__( 15 | self, in_channels_list, out_channels, conv_block, top_blocks=None 16 | ): 17 | """ 18 | Arguments: 19 | in_channels_list (list[int]): number of channels for each feature map that 20 | will be fed 21 | out_channels (int): number of channels of the FPN representation 22 | top_blocks (nn.Module or None): if provided, an extra operation will 23 | be performed on the output of the last (smallest resolution) 24 | FPN output, and the result will extend the result list 25 | """ 26 | super(FPN, self).__init__() 27 | self.inner_blocks = [] 28 | self.layer_blocks = [] 29 | for idx, in_channels in enumerate(in_channels_list, 1): 30 | inner_block = "fpn_inner{}".format(idx) 31 | layer_block = "fpn_layer{}".format(idx) 32 | inner_block_module = conv_block(in_channels, out_channels, 1) 33 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 34 | self.add_module(inner_block, inner_block_module) 35 | self.add_module(layer_block, layer_block_module) 36 | self.inner_blocks.append(inner_block) 37 | self.layer_blocks.append(layer_block) 38 | self.top_blocks = top_blocks 39 | 40 | def forward(self, x): 41 | """ 42 | Arguments: 43 | x (list[Tensor]): feature maps for each feature level. 44 | Returns: 45 | results (tuple[Tensor]): feature maps after FPN layers. 46 | They are ordered from highest resolution first. 47 | """ 48 | 49 | # the deepest layer gets the 1x1 -> 3x3 treatment. 50 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 51 | results = [] 52 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 53 | 54 | # ignore the last and reverse order (deepest to shallowest). 55 | for feature, inner_block, layer_block in zip( 56 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 57 | ): 58 | # nearest-neighbor upsampling of the top-down path. 59 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 60 | 61 | # 1x1 treatment. 62 | inner_lateral = getattr(self, inner_block)(feature) 63 | # TODO use size instead of scale to make it robust to different sizes 64 | # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], 65 | # mode='bilinear', align_corners=False) 66 | 67 | # add to resized top down. 68 | last_inner = inner_lateral + inner_top_down 69 | 70 | # 3x3 treatment. 71 | results.insert(0, getattr(self, layer_block)(last_inner)) 72 | 73 | if self.top_blocks is not None: 74 | # this should reference the 1x1 -> 3x3 treatment of the deepest block. 75 | # looks like this is usually just a max pool. 76 | last_results = self.top_blocks(results[-1]) 77 | results.extend(last_results) 78 | 79 | return tuple(results) 80 | 81 | 82 | class LastLevelMaxPool(nn.Module): 83 | def forward(self, x): 84 | return [F.max_pool2d(x, 1, 2, 0)] 85 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | from .panoptic_fpn import PanopticFPN 4 | 5 | 6 | # we could probably get away with re-using GeneralizedRCNN for panoptic at some point. 7 | _DETECTION_META_ARCHITECTURES = { 8 | "GeneralizedRCNN": GeneralizedRCNN, 9 | "PanopticFPN": PanopticFPN 10 | } 11 | 12 | def build_detection_model(cfg): 13 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 14 | return meta_arch(cfg) 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRCNN, self).__init__() 28 | 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg) 31 | self.roi_heads = build_roi_heads(cfg) 32 | 33 | def forward(self, images, targets=None): 34 | """ 35 | Arguments: 36 | images (list[Tensor] or ImageList): images to be processed 37 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 38 | 39 | Returns: 40 | result (list[BoxList] or dict[Tensor]): the output from the model. 41 | During training, it returns a dict[Tensor] which contains the losses. 42 | During testing, it returns list[BoxList] contains additional fields 43 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 44 | 45 | """ 46 | if self.training and targets is None: 47 | raise ValueError("In training mode, targets should be passed") 48 | 49 | # usually, it seems this is already an ImageList. 50 | images = to_image_list(images) 51 | features = self.backbone(images.tensors) 52 | 53 | # for panoptic FPN, it seems like we might need to disentangle this so we can 54 | # feed the features to the semantic head. 55 | proposals, proposal_losses = self.rpn(images, features, targets) 56 | if self.roi_heads: 57 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 58 | else: 59 | # RPN-only models don't have roi_heads 60 | x = features 61 | result = proposals 62 | detector_losses = {} 63 | 64 | if self.training: 65 | losses = {} 66 | losses.update(detector_losses) 67 | losses.update(proposal_losses) 68 | return losses 69 | 70 | return result 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | from torch.nn import functional as F 9 | from maskrcnn_benchmark.config import cfg 10 | from maskrcnn_benchmark.layers import Conv2d 11 | from maskrcnn_benchmark.layers.misc import DFConv2d 12 | from maskrcnn_benchmark.modeling.poolers import Pooler 13 | 14 | 15 | def get_group_gn(dim, dim_per_gp, num_groups): 16 | """get number of groups used by GroupNorm, based on number of channels.""" 17 | assert dim_per_gp == -1 or num_groups == -1, \ 18 | "GroupNorm: can only specify G or C/G." 19 | 20 | if dim_per_gp > 0: 21 | assert dim % dim_per_gp == 0, \ 22 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 23 | group_gn = dim // dim_per_gp 24 | else: 25 | assert dim % num_groups == 0, \ 26 | "dim: {}, num_groups: {}".format(dim, num_groups) 27 | group_gn = num_groups 28 | 29 | return group_gn 30 | 31 | 32 | def group_norm(out_channels, affine=True, divisor=1): 33 | out_channels = out_channels // divisor 34 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 35 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 36 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 37 | return torch.nn.GroupNorm( 38 | get_group_gn(out_channels, dim_per_gp, num_groups), 39 | out_channels, 40 | eps, 41 | affine 42 | ) 43 | 44 | 45 | def make_conv3x3( 46 | in_channels, 47 | out_channels, 48 | dilation=1, 49 | stride=1, 50 | use_gn=False, 51 | use_relu=False, 52 | kaiming_init=True 53 | ): 54 | conv = Conv2d( 55 | in_channels, 56 | out_channels, 57 | kernel_size=3, 58 | stride=stride, 59 | padding=dilation, 60 | dilation=dilation, 61 | bias=False if use_gn else True 62 | ) 63 | if kaiming_init: 64 | nn.init.kaiming_normal_( 65 | conv.weight, mode="fan_out", nonlinearity="relu" 66 | ) 67 | else: 68 | torch.nn.init.normal_(conv.weight, std=0.01) 69 | if not use_gn: 70 | nn.init.constant_(conv.bias, 0) 71 | module = [conv,] 72 | if use_gn: 73 | module.append(group_norm(out_channels)) 74 | if use_relu: 75 | module.append(nn.ReLU(inplace=True)) 76 | if len(module) > 1: 77 | return nn.Sequential(*module) 78 | return conv 79 | 80 | def make_dfconv3x3( 81 | in_channels, 82 | out_channels, 83 | dilation=1, 84 | stride=1, 85 | use_gn=False, 86 | use_relu=False, 87 | kaiming_init=True 88 | ): 89 | conv = DFConv2d( 90 | in_channels, 91 | out_channels, 92 | kernel_size=3, 93 | stride=stride, 94 | dilation=dilation, 95 | bias=False if use_gn else True 96 | ) 97 | module = [conv,] 98 | if use_gn: 99 | module.append(group_norm(out_channels)) 100 | if use_relu: 101 | module.append(nn.ReLU(inplace=True)) 102 | if len(module) > 1: 103 | return nn.Sequential(*module) 104 | return conv 105 | 106 | 107 | def make_fc(dim_in, hidden_dim, use_gn): 108 | ''' 109 | Caffe2 implementation uses XavierFill, which in fact 110 | corresponds to kaiming_uniform_ in PyTorch 111 | ''' 112 | if use_gn: 113 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 114 | nn.init.kaiming_uniform_(fc.weight, a=1) 115 | return nn.Sequential(fc, group_norm(hidden_dim)) 116 | fc = nn.Linear(dim_in, hidden_dim) 117 | nn.init.kaiming_uniform_(fc.weight, a=1) 118 | nn.init.constant_(fc.bias, 0) 119 | return fc 120 | 121 | 122 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False): 123 | def make_conv( 124 | in_channels, out_channels, kernel_size, stride=1, dilation=1 125 | ): 126 | conv = Conv2d( 127 | in_channels, 128 | out_channels, 129 | kernel_size=kernel_size, 130 | stride=stride, 131 | padding=dilation * (kernel_size - 1) // 2, 132 | dilation=dilation, 133 | bias=False if use_gn else True 134 | ) 135 | # Caffe2 implementation uses XavierFill, which in fact 136 | # corresponds to kaiming_uniform_ in PyTorch 137 | nn.init.kaiming_uniform_(conv.weight, a=1) 138 | if not use_gn: 139 | nn.init.constant_(conv.bias, 0) 140 | module = [conv,] 141 | if use_gn: 142 | module.append(group_norm(out_channels)) 143 | if use_relu: 144 | module.append(nn.ReLU(inplace=True)) 145 | if len(module) > 1: 146 | return nn.Sequential(*module) 147 | return conv 148 | 149 | return make_conv 150 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/poolers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.layers import ROIAlign 7 | 8 | from .utils import cat 9 | 10 | 11 | class LevelMapper(object): 12 | """Determine which FPN level each RoI in a set of RoIs should map to based 13 | on the heuristic in the FPN paper. 14 | """ 15 | 16 | def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6): 17 | """ 18 | Arguments: 19 | k_min (int) 20 | k_max (int) 21 | canonical_scale (int) 22 | canonical_level (int) 23 | eps (float) 24 | """ 25 | self.k_min = k_min 26 | self.k_max = k_max 27 | self.s0 = canonical_scale 28 | self.lvl0 = canonical_level 29 | self.eps = eps 30 | 31 | def __call__(self, boxlists): 32 | """ 33 | Arguments: 34 | boxlists (list[BoxList]) 35 | """ 36 | # Compute level ids 37 | s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists])) 38 | 39 | # Eqn.(1) in FPN paper 40 | target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps)) 41 | target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) 42 | return target_lvls.to(torch.int64) - self.k_min 43 | 44 | 45 | class Pooler(nn.Module): 46 | """ 47 | Pooler for Detection with or without FPN. 48 | It currently hard-code ROIAlign in the implementation, 49 | but that can be made more generic later on. 50 | Also, the requirement of passing the scales is not strictly necessary, as they 51 | can be inferred from the size of the feature map / size of original image, 52 | which is available thanks to the BoxList. 53 | """ 54 | 55 | def __init__(self, output_size, scales, sampling_ratio): 56 | """ 57 | Arguments: 58 | output_size (list[tuple[int]] or list[int]): output size for the pooled region 59 | scales (list[float]): scales for each Pooler 60 | sampling_ratio (int): sampling ratio for ROIAlign 61 | """ 62 | super(Pooler, self).__init__() 63 | poolers = [] 64 | for scale in scales: 65 | poolers.append( 66 | ROIAlign( 67 | output_size, spatial_scale=scale, sampling_ratio=sampling_ratio 68 | ) 69 | ) 70 | self.poolers = nn.ModuleList(poolers) 71 | self.output_size = output_size 72 | # get the levels in the feature map by leveraging the fact that the network always 73 | # downsamples by a factor of 2 at each level. 74 | lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() 75 | lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() 76 | self.map_levels = LevelMapper(lvl_min, lvl_max) 77 | 78 | def convert_to_roi_format(self, boxes): 79 | concat_boxes = cat([b.bbox for b in boxes], dim=0) 80 | device, dtype = concat_boxes.device, concat_boxes.dtype 81 | ids = cat( 82 | [ 83 | torch.full((len(b), 1), i, dtype=dtype, device=device) 84 | for i, b in enumerate(boxes) 85 | ], 86 | dim=0, 87 | ) 88 | rois = torch.cat([ids, concat_boxes], dim=1) 89 | return rois 90 | 91 | def forward(self, x, boxes): 92 | """ 93 | Arguments: 94 | x (list[Tensor]): feature maps for each level 95 | boxes (list[BoxList]): boxes to be used to perform the pooling operation. 96 | Returns: 97 | result (Tensor) 98 | """ 99 | num_levels = len(self.poolers) 100 | rois = self.convert_to_roi_format(boxes) 101 | if num_levels == 1: 102 | return self.poolers[0](x[0], rois) 103 | 104 | levels = self.map_levels(boxes) 105 | 106 | num_rois = len(rois) 107 | num_channels = x[0].shape[1] 108 | output_size = self.output_size[0] 109 | 110 | dtype, device = x[0].dtype, x[0].device 111 | result = torch.zeros( 112 | (num_rois, num_channels, output_size, output_size), 113 | dtype=dtype, 114 | device=device, 115 | ) 116 | for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)): 117 | idx_in_level = torch.nonzero(levels == level).squeeze(1) 118 | rois_per_level = rois[idx_in_level] 119 | result[idx_in_level] = pooler(per_level_feature, rois_per_level) 120 | 121 | return result 122 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 7 | RPN_HEADS = Registry() 8 | SEMANTIC_HEADS = Registry() 9 | UPSAMPLE_MODULES = Registry() 10 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | class ROIBoxHead(torch.nn.Module): 11 | """ 12 | Generic Box Head class. 13 | """ 14 | 15 | def __init__(self, cfg): 16 | super(ROIBoxHead, self).__init__() 17 | self.feature_extractor = make_roi_box_feature_extractor(cfg) 18 | self.predictor = make_roi_box_predictor(cfg) 19 | self.post_processor = make_roi_box_post_processor(cfg) 20 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 21 | 22 | def forward(self, features, proposals, targets=None): 23 | """ 24 | Arguments: 25 | features (list[Tensor]): feature-maps from possibly several levels 26 | proposals (list[BoxList]): proposal boxes 27 | targets (list[BoxList], optional): the ground-truth targets. 28 | 29 | Returns: 30 | x (Tensor): the result of the feature extractor 31 | proposals (list[BoxList]): during training, the subsampled proposals 32 | are returned. During testing, the predicted boxlists are returned 33 | losses (dict[Tensor]): During training, returns the losses for the 34 | head. During testing, returns an empty dict. 35 | """ 36 | 37 | if self.training: 38 | # Faster R-CNN subsamples during training the proposals with a fixed 39 | # positive / negative ratio 40 | with torch.no_grad(): 41 | proposals = self.loss_evaluator.subsample(proposals, targets) 42 | 43 | # extract features that will be fed to the final classifier. The 44 | # feature_extractor generally corresponds to the pooler + heads 45 | x = self.feature_extractor(features, proposals) 46 | # final classifier that converts the features into predictions 47 | class_logits, box_regression = self.predictor(x) 48 | 49 | if not self.training: 50 | result = self.post_processor((class_logits, box_regression), proposals) 51 | return x, result, {} 52 | 53 | loss_classifier, loss_box_reg = self.loss_evaluator( 54 | [class_logits], [box_regression] 55 | ) 56 | return ( 57 | x, 58 | proposals, 59 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 60 | ) 61 | 62 | 63 | def build_roi_box_head(cfg): 64 | """ 65 | Constructs a new box head. 66 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 67 | and make it a parameter in the config 68 | """ 69 | return ROIBoxHead(cfg) 70 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | 4 | 5 | class FastRCNNPredictor(nn.Module): 6 | def __init__(self, config, pretrained=None): 7 | super(FastRCNNPredictor, self).__init__() 8 | 9 | stage_index = 4 10 | stage2_relative_factor = 2 ** (stage_index - 1) 11 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 12 | num_inputs = res2_out_channels * stage2_relative_factor 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | class FPNPredictor(nn.Module): 34 | def __init__(self, cfg): 35 | super(FPNPredictor, self).__init__() 36 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 37 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 38 | 39 | self.cls_score = nn.Linear(representation_size, num_classes) 40 | self.bbox_pred = nn.Linear(representation_size, num_classes * 4) 41 | 42 | nn.init.normal_(self.cls_score.weight, std=0.01) 43 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 44 | for l in [self.cls_score, self.bbox_pred]: 45 | nn.init.constant_(l.bias, 0) 46 | 47 | def forward(self, x): 48 | scores = self.cls_score(x) 49 | bbox_deltas = self.bbox_pred(x) 50 | 51 | return scores, bbox_deltas 52 | 53 | 54 | _ROI_BOX_PREDICTOR = { 55 | "FastRCNNPredictor": FastRCNNPredictor, 56 | "FPNPredictor": FPNPredictor, 57 | } 58 | 59 | 60 | def make_roi_box_predictor(cfg): 61 | func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg) 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | import pdb 13 | 14 | def keep_only_positive_boxes(boxes): 15 | """ 16 | Given a set of BoxList containing the `labels` field, 17 | return a set of BoxList for which `labels > 0`. 18 | 19 | Arguments: 20 | boxes (list of BoxList) 21 | """ 22 | assert isinstance(boxes, (list, tuple)) 23 | assert isinstance(boxes[0], BoxList) 24 | assert boxes[0].has_field("labels") 25 | positive_boxes = [] 26 | positive_inds = [] 27 | num_boxes = 0 28 | for boxes_per_image in boxes: 29 | labels = boxes_per_image.get_field("labels") 30 | inds_mask = labels > 0 31 | inds = inds_mask.nonzero().squeeze(1) 32 | positive_boxes.append(boxes_per_image[inds]) 33 | positive_inds.append(inds_mask) 34 | return positive_boxes, positive_inds 35 | 36 | 37 | class ROIMaskHead(torch.nn.Module): 38 | def __init__(self, cfg): 39 | super(ROIMaskHead, self).__init__() 40 | self.cfg = cfg.clone() 41 | self.feature_extractor = make_roi_mask_feature_extractor(cfg) 42 | self.predictor = make_roi_mask_predictor(cfg) 43 | self.post_processor = make_roi_mask_post_processor(cfg) 44 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 45 | 46 | def forward(self, features, proposals, targets=None): 47 | """ 48 | Arguments: 49 | features (list[Tensor]): feature-maps from possibly several levels 50 | proposals (list[BoxList]): proposal boxes 51 | targets (list[BoxList], optional): the ground-truth targets. 52 | 53 | Returns: 54 | x (Tensor): the result of the feature extractor 55 | proposals (list[BoxList]): during training, the original proposals 56 | are returned. During testing, the predicted boxlists are returned 57 | with the `mask` field set 58 | losses (dict[Tensor]): During training, returns the losses for the 59 | head. During testing, returns an empty dict. 60 | """ 61 | 62 | if self.training: 63 | # during training, only focus on positive boxes 64 | all_proposals = proposals 65 | proposals, positive_inds = keep_only_positive_boxes(proposals) 66 | 67 | # len(proposals) has usually quite dwindled down by now. 68 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 69 | x = features 70 | x = x[torch.cat(positive_inds, dim=0)] 71 | else: 72 | x = self.feature_extractor(features, proposals) 73 | 74 | mask_logits, mask_scores = self.predictor(x) 75 | 76 | if not self.training: 77 | result = self.post_processor(mask_logits, proposals, mask_scores) 78 | return x, result, {}, mask_logits, mask_scores 79 | 80 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 81 | 82 | return x, all_proposals, dict(loss_mask=loss_mask), mask_logits, mask_scores 83 | 84 | 85 | def build_roi_mask_head(cfg): 86 | return ROIMaskHead(cfg) 87 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling.poolers import Pooler 7 | from maskrcnn_benchmark.layers import Conv2d 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | 12 | class MaskRCNNFPNFeatureExtractor(nn.Module): 13 | """ 14 | Heads for FPN for classification 15 | """ 16 | 17 | def __init__(self, cfg): 18 | """ 19 | Arguments: 20 | num_classes (int): number of output classes 21 | input_size (int): number of channels of the input once it's flattened 22 | representation_size (int): size of the intermediate representation 23 | """ 24 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 25 | 26 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 27 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 28 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 29 | pooler = Pooler( 30 | output_size=(resolution, resolution), 31 | scales=scales, 32 | sampling_ratio=sampling_ratio, 33 | ) 34 | input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS 35 | self.pooler = pooler 36 | 37 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 38 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 39 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 40 | 41 | next_feature = input_size 42 | self.blocks = [] 43 | for layer_idx, layer_features in enumerate(layers, 1): 44 | layer_name = "mask_fcn{}".format(layer_idx) 45 | module = make_conv3x3(next_feature, layer_features, 46 | dilation=dilation, stride=1, use_gn=use_gn 47 | ) 48 | self.add_module(layer_name, module) 49 | next_feature = layer_features 50 | self.blocks.append(layer_name) 51 | 52 | def forward(self, x, proposals): 53 | x = self.pooler(x, proposals) 54 | 55 | for layer_name in self.blocks: 56 | x = F.relu(getattr(self, layer_name)(x)) 57 | 58 | return x 59 | 60 | 61 | _ROI_MASK_FEATURE_EXTRACTORS = { 62 | "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor, 63 | "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor, 64 | } 65 | 66 | 67 | def make_roi_mask_feature_extractor(cfg): 68 | func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR] 69 | return func(cfg) 70 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | 8 | import pdb 9 | 10 | 11 | class MaskRCNNC4Predictor(nn.Module): 12 | def __init__(self, cfg): 13 | super(MaskRCNNC4Predictor, self).__init__() 14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 16 | 17 | if cfg.MODEL.ROI_HEADS.USE_FPN: 18 | num_inputs = dim_reduced 19 | else: 20 | stage_index = 4 21 | stage2_relative_factor = 2 ** (stage_index - 1) 22 | res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 23 | num_inputs = res2_out_channels * stage2_relative_factor 24 | 25 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 26 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 27 | 28 | for name, param in self.named_parameters(): 29 | if "bias" in name: 30 | nn.init.constant_(param, 0) 31 | elif "weight" in name: 32 | # Caffe2 implementation uses MSRAFill, which in fact 33 | # corresponds to kaiming_normal_ in PyTorch 34 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 35 | 36 | def forward(self, x): 37 | x = F.relu(self.conv5_mask(x)) 38 | 39 | return self.mask_fcn_logits(x), None 40 | 41 | class WithScorePredictor(nn.Module): 42 | def __init__(self, cfg): 43 | super(WithScorePredictor, self).__init__() 44 | 45 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 46 | num_inputs = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 47 | 48 | # do we need more parameters? 49 | self.score_avgpool = nn.AvgPool2d(kernel_size=14, stride=14) 50 | self.mask_score = nn.Linear(num_inputs, num_classes) 51 | 52 | nn.init.normal_(self.mask_score.weight, mean=0, std=0.01) 53 | nn.init.constant_(self.mask_score.bias, 0) 54 | 55 | def forward(self, x): 56 | pool = self.score_avgpool(x) 57 | flat = pool.view(pool.size(0), -1) 58 | return self.mask_score(flat) 59 | 60 | class MaskRCNNC4WithScorePredictor(nn.Module): 61 | def __init__(self, cfg): 62 | super(MaskRCNNC4WithScorePredictor, self).__init__() 63 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 64 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 65 | 66 | if cfg.MODEL.ROI_HEADS.USE_FPN: 67 | num_inputs = dim_reduced 68 | else: 69 | stage_index = 4 70 | stage2_relative_factor = 2 ** (stage_index - 1) 71 | res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 72 | num_inputs = res2_out_channels * stage2_relative_factor 73 | 74 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 75 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 76 | self.score_predictor = WithScorePredictor(cfg) 77 | 78 | for name, param in self.named_parameters(): 79 | if "bias" in name: 80 | nn.init.constant_(param, 0) 81 | elif "weight" in name: 82 | # Caffe2 implementation uses MSRAFill, which in fact 83 | # corresponds to kaiming_normal_ in PyTorch 84 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 85 | 86 | def forward(self, x): 87 | y = self.score_predictor(x) 88 | x = F.relu(self.conv5_mask(x)) 89 | 90 | return self.mask_fcn_logits(x), y 91 | 92 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor, "MaskRCNNC4WithScorePredictor": MaskRCNNC4WithScorePredictor} 93 | 94 | def make_roi_mask_predictor(cfg): 95 | func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 96 | return func(cfg) 97 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/order_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/order_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/order_head/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pycocotools.mask as mask_util 4 | import torch 5 | 6 | import pdb 7 | 8 | def prepare_mask_intersection_matrix(boxes, hard_masks): 9 | import pycocotools.mask as mask_util 10 | 11 | rles = [ 12 | mask_util.encode(np.array(hard_mask[0, :, :, np.newaxis], order="F"))[0] 13 | for hard_mask in hard_masks 14 | ] 15 | 16 | iscrowd = [] 17 | for rle in rles: 18 | rle["counts"] = rle["counts"].decode("utf-8") 19 | iscrowd.append(0) 20 | 21 | # quickly find those that intersect at all. 22 | iou = mask_util.iou(rles, rles, iscrowd) 23 | number_masks = len(rles) 24 | 25 | # zeros prevents diagonals from ever being considered. 26 | intersect_ratio = np.zeros_like(iou, dtype=np.float32) 27 | 28 | # note, these masks are variable size. unsure whether to do this altogether. 29 | #intersect_bbox = np.zeros((number_masks, number_masks, 4), dtype=np.int32) 30 | 31 | for from_index in range(intersect_ratio.shape[0]): 32 | from_rle = rles[from_index] 33 | from_area = mask_util.area(from_rle) 34 | 35 | # only need the upper triangle. 36 | for to_index in range(from_index + 1, intersect_ratio.shape[1]): 37 | to_rle = rles[to_index] 38 | to_area = mask_util.area(to_rle) 39 | 40 | # this is symmetric. 41 | merged = mask_util.merge([from_rle, to_rle], intersect=True) 42 | 43 | # do we compute the bbox here or wait for later? add some padding? 44 | #merged_bbox = mask_util.toBbox(merged) 45 | #intersect_bbox[from_index, to_index] = merged_bbox[0] 46 | #intersect_bbox[to_index, from_index] = merged_bbox[0] 47 | 48 | from_to_intersect = float(mask_util.area(merged)) 49 | 50 | from_ratio = from_to_intersect / (from_area + 0.0001) 51 | to_ratio = from_to_intersect / (to_area + 0.0001) 52 | 53 | # from -> to. 54 | intersect_ratio[from_index, to_index] = from_ratio 55 | intersect_ratio[to_index, from_index] = to_ratio 56 | 57 | intersect_ratio = torch.tensor(intersect_ratio).to(boxes.bbox.device) 58 | #intersect_bbox = torch.tensor(intersect_bbox).to(boxes.bbox.device) 59 | 60 | return intersect_ratio#, intersect_bbox 61 | 62 | def filter_actual_overlaps(target, matching, proposal_pairs): 63 | if not target.has_field("overlaps"): 64 | raise ValueError("overlaps do not exist on target") 65 | 66 | # from the "overlap" ground truth, we can infer the ground truth class that survives the overlap. 67 | overlaps = target.get_field("overlaps") 68 | 69 | first_idxs = torch.unsqueeze(matching[proposal_pairs[:, 0]], dim=1) 70 | second_idxs = torch.unsqueeze(matching[proposal_pairs[:, 1]], dim=1) 71 | 72 | selected_overlaps = overlaps[first_idxs, second_idxs] 73 | 74 | actual_overlaps = selected_overlaps >= 0 75 | mask_of_overlaps = torch.nonzero(actual_overlaps)[:, 0] 76 | 77 | return mask_of_overlaps, selected_overlaps 78 | 79 | def subsample_actual_overlaps(mask_of_overlaps, maximum_per_image=None): 80 | if maximum_per_image is None: 81 | return mask_of_overlaps 82 | 83 | number_masked = mask_of_overlaps.shape[0] 84 | subsample_size = min(maximum_per_image, number_masked) 85 | subsample_perm = torch.randperm(number_masked, device=mask_of_overlaps.device)[:subsample_size] 86 | 87 | return mask_of_overlaps[subsample_perm] 88 | 89 | def compute_overlap_matrix(target): 90 | width, height = target.size 91 | masks = target.get_field("masks") 92 | 93 | rles = [] 94 | iscrowd = [] 95 | for poly in masks.polygons: 96 | encoded = mask_util.frPyObjects(poly.polygons, height, width) 97 | 98 | rles.append(encoded[0]) 99 | iscrowd.append(0) 100 | 101 | return prepare_mask_intersection_matrix(target, rles, iscrowd) 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/order_head/roi_order_feature_extractors.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | 7 | class OrderHeadFeatureExtractor(nn.Module): 8 | def __init__(self, cfg): 9 | super(OrderHeadFeatureExtractor, self).__init__() 10 | 11 | self.cfg = cfg.clone() 12 | 13 | # paired. 14 | input_channels = self.cfg.MODEL.ROI_ORDER_HEAD.NUMBER_INPUT_CHANNELS 15 | number_channels = 512 16 | 17 | self.conv1 = Conv2d(input_channels, number_channels, 3, 1, 1) 18 | self.conv2 = Conv2d(number_channels, number_channels, 3, 1, 1) 19 | self.conv3 = Conv2d(number_channels, number_channels, 3, 1, 1) 20 | 21 | # stride 2. 22 | self.conv4 = Conv2d(number_channels, number_channels, 3, 2, 1) 23 | 24 | for l in [self.conv1, self.conv2, self.conv3, self.conv4]: 25 | nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") 26 | nn.init.constant_(l.bias, 0) 27 | 28 | def forward(self, x): 29 | x = F.relu(self.conv1(x)) 30 | x = F.relu(self.conv2(x)) 31 | x = F.relu(self.conv3(x)) 32 | x = F.relu(self.conv4(x)) 33 | 34 | return x 35 | 36 | def make_roi_order_feature_extractor(cfg): 37 | return OrderHeadFeatureExtractor(cfg) 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/order_head/roi_order_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.layers import Conv2d 5 | 6 | class OrderPredictor(nn.Module): 7 | def __init__(self, cfg): 8 | super(OrderPredictor, self).__init__() 9 | 10 | self.fc1 = nn.Linear(512 * 7 * 7, 1024) 11 | self.project = nn.Linear(1024, 1) 12 | 13 | nn.init.kaiming_uniform_(self.fc1.weight, a=1) 14 | nn.init.constant_(self.fc1.bias, 0) 15 | 16 | nn.init.normal_(self.project.weight, mean=0, std=0.01) 17 | nn.init.constant_(self.project.bias, 0) 18 | 19 | def forward(self, x): 20 | x = x.view(x.size(0), -1) 21 | x = F.relu(self.fc1(x)) 22 | x = self.project(x) 23 | 24 | return x 25 | 26 | class OrderPredictorThing(nn.Module): 27 | def __init__(self, cfg): 28 | super(OrderPredictorThing, self).__init__() 29 | 30 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 31 | 32 | # not sure about this one. 33 | self.fc1 = nn.Linear(512 * 7 * 7, 1024) 34 | self.project = nn.Linear(1024, num_classes) 35 | 36 | nn.init.kaiming_uniform_(self.fc1.weight, a=1) 37 | nn.init.constant_(self.fc1.bias, 0) 38 | 39 | nn.init.normal_(self.project.weight, mean=0, std=0.01) 40 | nn.init.constant_(self.project.bias, 0) 41 | 42 | def forward(self, x): 43 | x = x.view(x.size(0), -1) 44 | x = F.relu(self.fc1(x)) 45 | x = self.project(x) 46 | 47 | return x 48 | 49 | _ROI_ORDER_PREDICTOR = { 50 | "OrderPredictor": OrderPredictor, 51 | "OrderPredictorThing": OrderPredictorThing 52 | } 53 | 54 | def make_roi_order_predictor(cfg): 55 | func = _ROI_ORDER_PREDICTOR["OrderPredictor"] 56 | return func(cfg) 57 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .mask_head.mask_head import build_roi_mask_head 6 | from .order_head.order_head import build_roi_order_head 7 | 8 | import numpy as np 9 | import pdb 10 | 11 | class CombinedROIHeads(torch.nn.ModuleDict): 12 | """ 13 | Combines a set of individual heads (for box prediction or masks) into a single 14 | head. 15 | """ 16 | 17 | def __init__(self, cfg, heads): 18 | super(CombinedROIHeads, self).__init__(heads) 19 | self.cfg = cfg.clone() 20 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 21 | self.mask.feature_extractor = self.box.feature_extractor 22 | 23 | self.feed_ground_truth_instances = cfg.TEST.FEED_GROUND_TRUTH_INSTANCES 24 | 25 | def forward(self, features, proposals, targets=None, semantic_targets=None): 26 | losses = {} 27 | mask_logits = None 28 | 29 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 30 | x, detections, loss_box = self.box(features, proposals, targets) 31 | losses.update(loss_box) 32 | 33 | # replace the detections once again. 34 | if self.feed_ground_truth_instances: 35 | detections = proposals 36 | 37 | if self.cfg.MODEL.MASK_ON: 38 | mask_features = features 39 | # optimization: during training, if we share the feature extractor between 40 | # the box and the mask heads, then we can reuse the features already computed 41 | if ( 42 | self.training 43 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 44 | ): 45 | mask_features = x 46 | 47 | # During training, self.box() will return the unaltered proposals as "detections" 48 | # this makes the API consistent during training and testing 49 | x, detections, loss_mask, mask_logits, mask_scores = self.mask(mask_features, detections, targets) 50 | losses.update(loss_mask) 51 | 52 | if self.cfg.MODEL.ORDER_ON: 53 | # mask head does this in a nicer way by "sharing" the feature extractor and 54 | # opting not to compute it again at training time. TODO! 55 | order_features = features 56 | if self.cfg.MODEL.ROI_ORDER_HEAD.SHARE_MASK_FEATURE_EXTRACTOR: 57 | order_features = x 58 | 59 | loss_order = self.order(order_features, detections, mask_logits, targets) 60 | losses.update(loss_order) 61 | 62 | return x, detections, losses 63 | 64 | def build_roi_heads(cfg): 65 | # individually create the heads, that will be combined together 66 | # afterwards 67 | roi_heads = [] 68 | if not cfg.MODEL.RPN_ONLY: 69 | roi_heads.append(("box", build_roi_box_head(cfg))) 70 | if cfg.MODEL.MASK_ON: 71 | roi_heads.append(("mask", build_roi_mask_head(cfg))) 72 | if cfg.MODEL.ORDER_ON: 73 | roi_heads.append(("order", build_roi_order_head(cfg))) 74 | 75 | # combine individual heads in a single module 76 | if roi_heads: 77 | roi_heads = CombinedROIHeads(cfg, roi_heads) 78 | 79 | return roi_heads 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/segmenter/__init__.py: -------------------------------------------------------------------------------- 1 | from .segmenters import build_segmentation_model 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/segmenter/segmenters.py: -------------------------------------------------------------------------------- 1 | from .semantic_fpn import SemanticFPN 2 | 3 | _SEGMENTATION_META_ARCHITECTURES = { 4 | "SemanticFPN": SemanticFPN 5 | } 6 | 7 | def build_segmentation_model(cfg): 8 | meta_arch = _SEGMENTATION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 9 | return meta_arch(cfg) 10 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/segmenter/semantic_fpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Semantic FPN 3 | """ 4 | 5 | import torch 6 | from torch import nn 7 | 8 | from maskrcnn_benchmark.structures.image_list import to_image_list 9 | 10 | from ..backbone import build_backbone 11 | from ..semantic.segmentation import build_semantic_segmentation 12 | 13 | import pdb 14 | 15 | class SemanticFPN(nn.Module): 16 | """ 17 | Main class for Semantic FPN. Takes a semantic target. 18 | It consists of three main parts: 19 | - backbone 20 | """ 21 | 22 | def __init__(self, cfg): 23 | super(SemanticFPN, self).__init__() 24 | 25 | self.backbone = build_backbone(cfg) 26 | self.semantic_segmentation = build_semantic_segmentation(cfg) 27 | 28 | def forward(self, images, targets=None): 29 | """ 30 | Arguments: 31 | images (list[Tensor] or ImageList): images to be processed 32 | targets (list[SemanticSegmentation]): ground-truth segmentation in the image (optional) 33 | 34 | Returns: 35 | result (list[Tensor] or dict[Tensor]): the output from the model. 36 | During training, it returns a dict[Tensor] which contains the losses. 37 | During testing, it returns list[Tensor] contains the segmentation image. 38 | 39 | """ 40 | if self.training and targets is None: 41 | raise ValueError("In training mode, targets should be passed") 42 | 43 | # usually, it seems this is already an ImageList. 44 | images = to_image_list(images) 45 | 46 | # note that these are already run through FPN if FPN is included. 47 | features = self.backbone(images.tensors) 48 | 49 | semantic_targets = [f.segmentation for f in targets] 50 | mask, semantic_losses = self.semantic_segmentation(images, features, semantic_targets) 51 | 52 | if self.training: 53 | losses = {} 54 | losses.update(semantic_losses) 55 | return losses 56 | 57 | return mask 58 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/semantic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/semantic/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/semantic/loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains specific functions for computing losses from 3 | semantic segmentation 4 | """ 5 | 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | from maskrcnn_benchmark.modeling.utils import cat 10 | import pdb 11 | 12 | class SemanticLossComputation(object): 13 | """ 14 | This class computes the semantic segmentation loss. 15 | """ 16 | 17 | def __init__(self): 18 | pass 19 | 20 | def __call__(self, class_logits, targets): 21 | """ 22 | Arguments: 23 | logits (list[Tensor]) 24 | targets (list[SemanticSegmentation]) 25 | 26 | Returns: 27 | semantic_loss (Tensor) 28 | """ 29 | 30 | # it appears that "class_logits" might be right/bottom zero padding. take 31 | # care of any differences there and add them as not having a label on 32 | # a per image basis. 33 | labels = [target.mask for target in targets] 34 | has_label_mask = [target.has_label_mask for target in targets] 35 | 36 | # todo, easier way? 37 | for i in range(len(targets)): 38 | given_shape = targets[i].mask.shape 39 | padded_shape = class_logits[i].shape[1:] 40 | extra_bottom = padded_shape[0] - given_shape[0] 41 | extra_right = padded_shape[1] - given_shape[1] 42 | 43 | labels[i] = torch.unsqueeze(F.pad(labels[i], (0, extra_right, 0, extra_bottom), value=0), dim=0) 44 | has_label_mask[i] = torch.unsqueeze(F.pad(has_label_mask[i], (0, extra_right, 0, extra_bottom), value=0), dim=0) 45 | 46 | labels = cat(labels, dim=0).long() 47 | has_label_mask = cat(has_label_mask, dim=0).float() 48 | 49 | # this might be interesting to play with when the ontology is dense. 50 | # e.g. do we penalize getting a "stuff" class when a "thing" is present in a different 51 | # manner to getting a _wrong_ "thing" class? 52 | classification_loss = F.cross_entropy(class_logits, labels, reduction="none") 53 | 54 | # multiply by the "has label" mask 55 | masked_classification_loss = has_label_mask * classification_loss 56 | mean_classification_loss = torch.mean(masked_classification_loss) 57 | 58 | return mean_classification_loss 59 | 60 | def make_semantic_loss_evaluator(cfg): 61 | loss_evaluator = SemanticLossComputation() 62 | return loss_evaluator 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import pdb 4 | 5 | from .lr_scheduler import WarmupMultiStepLR 6 | 7 | 8 | def make_optimizer(cfg, model): 9 | params = [] 10 | 11 | order_only = cfg.MODEL.ORDER_ON and cfg.MODEL.ROI_ORDER_HEAD.ONLY_TRAIN 12 | for key, value in model.named_parameters(): 13 | if order_only and not ("roi_heads.order" in key): 14 | print("turning off {0} due to order head only".format(key)) 15 | value.requires_grad = False 16 | 17 | if not value.requires_grad: 18 | continue 19 | lr = cfg.SOLVER.BASE_LR 20 | 21 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 22 | if "bias" in key: 23 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 24 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 25 | 26 | add_params = { 27 | "params": [value], 28 | "lr": lr, 29 | "weight_decay": weight_decay 30 | } 31 | 32 | # FIX. 33 | if cfg.SOLVER.RESUME_ITER > 0: 34 | add_params["initial_lr"] = cfg.SOLVER.BASE_LR 35 | 36 | params += [add_params] 37 | 38 | optimizer = torch.optim.SGD(params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM) 39 | return optimizer 40 | 41 | def make_lr_scheduler(cfg, optimizer): 42 | last_epoch = -1 43 | if cfg.SOLVER.RESUME_ITER > 0: 44 | last_epoch = cfg.SOLVER.RESUME_ITER 45 | 46 | return WarmupMultiStepLR( 47 | optimizer, 48 | cfg.SOLVER.STEPS, 49 | cfg.SOLVER.GAMMA, 50 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 51 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 52 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 53 | last_epoch=last_epoch 54 | ) 55 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/boxlist_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .bounding_box import BoxList 5 | 6 | from maskrcnn_benchmark.layers import nms as _box_nms 7 | 8 | 9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="score"): 10 | """ 11 | Performs non-maximum suppression on a boxlist, with scores specified 12 | in a boxlist field via score_field. 13 | 14 | Arguments: 15 | boxlist(BoxList) 16 | nms_thresh (float) 17 | max_proposals (int): if > 0, then only the top max_proposals are kept 18 | after non-maxium suppression 19 | score_field (str) 20 | """ 21 | if nms_thresh <= 0: 22 | return boxlist 23 | mode = boxlist.mode 24 | boxlist = boxlist.convert("xyxy") 25 | boxes = boxlist.bbox 26 | score = boxlist.get_field(score_field) 27 | keep = _box_nms(boxes, score, nms_thresh) 28 | if max_proposals > 0: 29 | keep = keep[: max_proposals] 30 | boxlist = boxlist[keep] 31 | return boxlist.convert(mode) 32 | 33 | 34 | def remove_small_boxes(boxlist, min_size): 35 | """ 36 | Only keep boxes with both sides >= min_size 37 | 38 | Arguments: 39 | boxlist (Boxlist) 40 | min_size (int) 41 | """ 42 | # TODO maybe add an API for querying the ws / hs 43 | xywh_boxes = boxlist.convert("xywh").bbox 44 | _, _, ws, hs = xywh_boxes.unbind(dim=1) 45 | keep = ( 46 | (ws >= min_size) & (hs >= min_size) 47 | ).nonzero().squeeze(1) 48 | return boxlist[keep] 49 | 50 | 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py 52 | # with slight modifications 53 | def boxlist_iou(boxlist1, boxlist2): 54 | """Compute the intersection over union of two set of boxes. 55 | The box order must be (xmin, ymin, xmax, ymax). 56 | 57 | Arguments: 58 | box1: (BoxList) bounding boxes, sized [N,4]. 59 | box2: (BoxList) bounding boxes, sized [M,4]. 60 | 61 | Returns: 62 | (tensor) iou, sized [N,M]. 63 | 64 | Reference: 65 | https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py 66 | """ 67 | if boxlist1.size != boxlist2.size: 68 | raise RuntimeError( 69 | "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) 70 | 71 | N = len(boxlist1) 72 | M = len(boxlist2) 73 | 74 | area1 = boxlist1.area() 75 | area2 = boxlist2.area() 76 | 77 | box1, box2 = boxlist1.bbox, boxlist2.bbox 78 | 79 | lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] 80 | rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] 81 | 82 | TO_REMOVE = 1 83 | 84 | wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] 85 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 86 | 87 | iou = inter / (area1[:, None] + area2 - inter) 88 | return iou 89 | 90 | def boxlist_intersect(boxlist1, boxlist2): 91 | """Compute the intersection of two set of boxes. 92 | The box order must be (xmin, ymin, xmax, ymax). 93 | 94 | Arguments: 95 | box1: (BoxList) bounding boxes, sized [N,4]. 96 | box2: (BoxList) bounding boxes, sized [M,4]. 97 | 98 | Returns: 99 | (tensor) inter, sized [N,M]. 100 | 101 | Reference: 102 | https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py 103 | """ 104 | if boxlist1.size != boxlist2.size: 105 | raise RuntimeError( 106 | "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) 107 | 108 | N = len(boxlist1) 109 | M = len(boxlist2) 110 | 111 | box1, box2 = boxlist1.bbox, boxlist2.bbox 112 | 113 | lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] 114 | rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] 115 | 116 | TO_REMOVE = 1 117 | 118 | wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] 119 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 120 | 121 | return inter 122 | 123 | # TODO redundant, remove 124 | def _cat(tensors, dim=0): 125 | """ 126 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 127 | """ 128 | assert isinstance(tensors, (list, tuple)) 129 | if len(tensors) == 1: 130 | return tensors[0] 131 | return torch.cat(tensors, dim) 132 | 133 | 134 | def cat_boxlist(bboxes): 135 | """ 136 | Concatenates a list of BoxList (having the same image size) into a 137 | single BoxList 138 | 139 | Arguments: 140 | bboxes (list[BoxList]) 141 | """ 142 | assert isinstance(bboxes, (list, tuple)) 143 | assert all(isinstance(bbox, BoxList) for bbox in bboxes) 144 | 145 | size = bboxes[0].size 146 | assert all(bbox.size == size for bbox in bboxes) 147 | 148 | mode = bboxes[0].mode 149 | assert all(bbox.mode == mode for bbox in bboxes) 150 | 151 | fields = set(bboxes[0].fields()) 152 | assert all(set(bbox.fields()) == fields for bbox in bboxes) 153 | 154 | cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode) 155 | 156 | for field in fields: 157 | data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0) 158 | cat_boxes.add_field(field, data) 159 | 160 | return cat_boxes 161 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | assert tensors.dim() == 4 45 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 46 | return ImageList(tensors, image_sizes) 47 | elif isinstance(tensors, (tuple, list)): 48 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 49 | 50 | # TODO Ideally, just remove this and let me model handle arbitrary 51 | # input sizs 52 | if size_divisible > 0: 53 | import math 54 | 55 | stride = size_divisible 56 | max_size = list(max_size) 57 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 58 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 59 | max_size = tuple(max_size) 60 | 61 | batch_shape = (len(tensors),) + max_size 62 | batched_imgs = tensors[0].new(*batch_shape).zero_() 63 | for img, pad_img in zip(tensors, batched_imgs): 64 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 65 | 66 | image_sizes = [im.shape[-2:] for im in tensors] 67 | 68 | return ImageList(batched_imgs, image_sizes) 69 | else: 70 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/panoptic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import PIL 3 | import torch 4 | 5 | import pdb 6 | 7 | from PIL import Image 8 | 9 | FLIP_LEFT_RIGHT = 0 10 | FLIP_TOP_BOTTOM = 1 11 | RGB_BASE = 256 12 | 13 | class SemanticSegmentation(object): 14 | # mask is a mask of class IDs, has_label_mask is binary for whether 15 | # the pixel is labeled. 16 | def __init__(self, mask, has_label_mask): 17 | self.mask = mask 18 | self.has_label_mask = has_label_mask 19 | # will Mask RCNN put the previous on the GPU and then call augmentation methods? 20 | 21 | def to(self, device): 22 | return SemanticSegmentation( 23 | mask=self.mask.to(device), 24 | has_label_mask=self.has_label_mask.to(device) if not (self.has_label_mask is None) else None) 25 | 26 | def resize(self, size, *args, **kwargs): 27 | if (self.mask.device.type != 'cpu') and (self.has_label_mask.device.type != 'cpu'): 28 | raise Exception('expected tensors to be on the CPU for resizing') 29 | 30 | # todo, put this all on the GPU/CPU without going back to PIL. 31 | mask_np = self.mask.numpy().astype(np.uint8) 32 | mask_img = Image.fromarray(mask_np) 33 | resized_mask_img = mask_img.resize(size, resample=PIL.Image.NEAREST) 34 | 35 | if not (self.has_label_mask is None): 36 | has_label_mask_np = self.has_label_mask.numpy().astype(np.uint8) 37 | has_label_mask_img = Image.fromarray(has_label_mask_np) 38 | resized_has_label_mask_img = has_label_mask_img.resize(size, resample=PIL.Image.NEAREST) 39 | resized_has_label_mask = torch.tensor(np.array(resized_has_label_mask_img).astype(np.int32)) 40 | else: 41 | resized_has_label_mask = None 42 | 43 | # seems like this should be consistent, but a bit unsure. 44 | return SemanticSegmentation( 45 | mask=torch.tensor(np.array(resized_mask_img).astype(np.int32)), 46 | has_label_mask=resized_has_label_mask) 47 | 48 | def transpose(self, method): 49 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 50 | raise NotImplementedError( 51 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 52 | ) 53 | 54 | if method == FLIP_LEFT_RIGHT: 55 | flipped_mask = torch.flip(self.mask, [1]) 56 | flipped_has_label_mask = torch.flip(self.has_label_mask, [1]) 57 | elif method == FLIP_TOP_BOTTOM: 58 | flipped_mask = torch.flip(self.mask, [0]) 59 | flipped_has_label_mask = torch.flip(self.has_label_mask, [0]) 60 | 61 | return SemanticSegmentation(mask=flipped_mask, has_label_mask=flipped_has_label_mask) 62 | 63 | def crop(self, box): 64 | cropped_mask = self.mask[box[1]:box[3], box[0]:box[2]] 65 | cropped_has_label_mask = self.has_label_mask[box[1]:box[3], box[0]:box[2]] 66 | 67 | return SemanticSegmentation(mask=cropped_mask, has_label_mask=cropped_has_label_mask) 68 | 69 | class PanopticTarget(object): 70 | # region: Mask RCNN object information 71 | # segmentation: Semantic segmentation class map. 72 | def __init__(self, region, segmentation): 73 | self.region = region 74 | self.segmentation = segmentation 75 | 76 | def to(self, device): 77 | return PanopticTarget( 78 | region=self.region.to(device), 79 | segmentation=self.segmentation.to(device)) 80 | 81 | def clip_to_image(self, remove_empty=True): 82 | # clip the bounding box. 83 | self.region = self.region.clip_to_image(remove_empty=remove_empty) 84 | 85 | # doesn't seem like there should be anything to clip for the 86 | # segmentation. 87 | return self 88 | 89 | def resize(self, size, *args, **kwargs): 90 | return PanopticTarget( 91 | region=self.region.resize(size, *args, **kwargs), 92 | segmentation=self.segmentation.resize(size, *args, **kwargs)) 93 | 94 | def transpose(self, method): 95 | return PanopticTarget( 96 | region=self.region.transpose(method), 97 | segmentation=self.segmentation.transpose(method)) 98 | 99 | def crop(self, box): 100 | return PanopticTarget( 101 | region=self.region.crop(box), 102 | segmentation=self.segmentation.crop(box)) 103 | 104 | 105 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, "log.txt")) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | import logging 4 | 5 | import pdb 6 | import torch 7 | 8 | from maskrcnn_benchmark.utils.imports import import_file 9 | 10 | 11 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict): 12 | """ 13 | Strategy: suppose that the models that we will create will have prefixes appended 14 | to each of its keys, for example due to an extra level of nesting that the original 15 | pre-trained weights from ImageNet won't contain. For example, model.state_dict() 16 | might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains 17 | res2.conv1.weight. We thus want to match both parameters together. 18 | For that, we look for each model weight, look among all loaded keys if there is one 19 | that is a suffix of the current weight name, and use it if that's the case. 20 | If multiple matches exist, take the one with longest size 21 | of the corresponding name. For example, for the same model as before, the pretrained 22 | weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, 23 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 24 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 25 | """ 26 | current_keys = sorted(list(model_state_dict.keys())) 27 | loaded_keys = sorted(list(loaded_state_dict.keys())) 28 | 29 | # get a matrix of string matches, where each (i, j) entry correspond to the size of the 30 | # loaded_key string, if it matches 31 | match_matrix = [ 32 | len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys 33 | ] 34 | match_matrix = torch.as_tensor(match_matrix).view( 35 | len(current_keys), len(loaded_keys) 36 | ) 37 | max_match_size, idxs = match_matrix.max(1) 38 | # remove indices that correspond to no-match 39 | idxs[max_match_size == 0] = -1 40 | 41 | # used for logging 42 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 43 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 44 | log_str_template = "{: <{}} loaded from {: <{}} of shape {}" 45 | logger = logging.getLogger(__name__) 46 | for idx_new, idx_old in enumerate(idxs.tolist()): 47 | if idx_old == -1: 48 | continue 49 | key = current_keys[idx_new] 50 | key_old = loaded_keys[idx_old] 51 | model_state_dict[key] = loaded_state_dict[key_old] 52 | logger.info( 53 | log_str_template.format( 54 | key, 55 | max_size, 56 | key_old, 57 | max_size_loaded, 58 | tuple(loaded_state_dict[key_old].shape), 59 | ) 60 | ) 61 | 62 | 63 | def strip_prefix_if_present(state_dict, prefix): 64 | keys = sorted(state_dict.keys()) 65 | if not all(key.startswith(prefix) for key in keys): 66 | return state_dict 67 | stripped_state_dict = OrderedDict() 68 | for key, value in state_dict.items(): 69 | stripped_state_dict[key.replace(prefix, "")] = value 70 | return stripped_state_dict 71 | 72 | 73 | def load_state_dict(model, loaded_state_dict): 74 | model_state_dict = model.state_dict() 75 | # if the state_dict comes from a model that was wrapped in a 76 | # DataParallel or DistributedDataParallel during serialization, 77 | # remove the "module" prefix before performing the matching 78 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 79 | align_and_update_state_dicts(model_state_dict, loaded_state_dict) 80 | 81 | # use strict loading 82 | model.load_state_dict(model_state_dict) 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | from torch.utils.model_zoo import _download_url_to_file 6 | from torch.utils.model_zoo import urlparse 7 | from torch.utils.model_zoo import HASH_REGEX 8 | 9 | from maskrcnn_benchmark.utils.comm import is_main_process 10 | from maskrcnn_benchmark.utils.comm import synchronize 11 | 12 | 13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 14 | # but with a few improvements and modifications 15 | def cache_url(url, model_dir=None, progress=True): 16 | r"""Loads the Torch serialized object at the given URL. 17 | If the object is already present in `model_dir`, it's deserialized and 18 | returned. The filename part of the URL should follow the naming convention 19 | ``filename-.ext`` where ```` is the first eight or more 20 | digits of the SHA256 hash of the contents of the file. The hash is used to 21 | ensure unique names and to verify the contents of the file. 22 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 23 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 24 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 25 | Args: 26 | url (string): URL of the object to download 27 | model_dir (string, optional): directory in which to save the object 28 | progress (bool, optional): whether or not to display a progress bar to stderr 29 | Example: 30 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 31 | """ 32 | if model_dir is None: 33 | torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) 34 | model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) 35 | if not os.path.exists(model_dir): 36 | os.makedirs(model_dir) 37 | parts = urlparse(url) 38 | filename = os.path.basename(parts.path) 39 | if filename == "model_final.pkl": 40 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 41 | # so make the full path the filename by replacing / with _ 42 | filename = parts.path.replace("/", "_") 43 | cached_file = os.path.join(model_dir, filename) 44 | if not os.path.exists(cached_file) and is_main_process(): 45 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 46 | hash_prefix = HASH_REGEX.search(filename) 47 | if hash_prefix is not None: 48 | hash_prefix = hash_prefix.group(1) 49 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 50 | # which matches the hash PyTorch uses. So we skip the hash matching 51 | # if the hash_prefix is less than 6 characters 52 | if len(hash_prefix) < 6: 53 | hash_prefix = None 54 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 55 | synchronize() 56 | return cached_file 57 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | --------------------------------------------------------------------------------