├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── MODEL_ZOO.md
├── README.md
├── TROUBLESHOOTING.md
├── configs
    ├── cityscapes
    │   ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_order.yaml
    │   ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    │   ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc.yaml
    │   ├── panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc_order.yaml
    │   └── panoptic_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
    ├── panoptic_2gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml
    ├── panoptic_2gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml
    ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml
    ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order.yaml
    ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order_nodense.yaml
    ├── panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml
    ├── panoptic_4gpu_mask_rcnn_R_101_FPN_1x_test_dev.yaml
    ├── panoptic_4gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml
    ├── panoptic_e2e_mask_rcnn_R_101_FPN_1x_order.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_test.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_val.yaml
    ├── panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.py
    └── panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml
├── maskrcnn_benchmark
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── paths_catalog.py
    ├── csrc
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── deform_conv_cuda.cu
    │   │   ├── deform_conv_kernel_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── deform_conv.h
    │   ├── nms.h
    │   └── vision.cpp
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── collate_batch.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── concat_dataset.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── coco_eval.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── voc_eval.py
    │   │   ├── list_dataset.py
    │   │   ├── panoptic_cityscapes.py
    │   │   ├── panoptic_coco.py
    │   │   └── voc.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv_func.py
    │   │   └── deform_conv_module.py
    │   ├── misc.py
    │   ├── nms.py
    │   ├── roi_align.py
    │   ├── roi_pool.py
    │   └── smooth_l1_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   ├── generalized_rcnn.py
    │   │   └── panoptic_fpn.py
    │   ├── make_layers.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── registry.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── box_head.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   └── roi_box_predictors.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── inference.py
    │   │   │   ├── loss.py
    │   │   │   ├── mask_head.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── roi_mask_predictors.py
    │   │   ├── order_head
    │   │   │   ├── __init__.py
    │   │   │   ├── loss.py
    │   │   │   ├── order_head.py
    │   │   │   ├── roi_order_feature_extractors.py
    │   │   │   └── roi_order_predictors.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── rpn.py
    │   ├── segmenter
    │   │   ├── __init__.py
    │   │   ├── segmenters.py
    │   │   └── semantic_fpn.py
    │   ├── semantic
    │   │   ├── __init__.py
    │   │   ├── loss.py
    │   │   └── segmentation.py
    │   └── utils.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── boxlist_ops.py
    │   ├── image_list.py
    │   ├── panoptic.py
    │   └── segmentation_mask.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── cv2_util.py
    │   ├── env.py
    │   ├── experiment.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── model_zoo.py
    │   └── registry.py
├── setup.py
└── tools
    ├── test_net.py
    └── train_net.py


/ABSTRACTIONS.md:
--------------------------------------------------------------------------------
 1 | ## Abstractions
 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to
 3 | have in mind are the following:
 4 | 
 5 | ### ImageList
 6 | In PyTorch, the first dimension of the input to the network generally represents
 7 | the batch dimension, and thus all elements of the same batch have the same
 8 | height / width.
 9 | In order to support images with different sizes and aspect ratios in the same
10 | batch, we created the `ImageList` class, which holds internally a batch of
11 | images (os possibly different sizes). The images are padded with zeros such that
12 | they have the same final size and batched over the first dimension. The original
13 | sizes of the images before padding are stored in the `image_sizes` attribute,
14 | and the batched tensor in `tensors`.
15 | We provide a convenience function `to_image_list` that accepts a few different
16 | input types, including a list of tensors, and returns an `ImageList` object.
17 | 
18 | ```python
19 | from maskrcnn_benchmark.structures.image_list import to_image_list
20 | 
21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)]
22 | batched_images = to_image_list(images)
23 | 
24 | # it is also possible to make the final batched image be a multiple of a number
25 | batched_images_32 = to_image_list(images, size_divisible=32)
26 | ```
27 | 
28 | ### BoxList
29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for
30 | a specific image, as well as the size of the image as a `(width, height)` tuple.
31 | It also contains a set of methods that allow to perform geometric
32 | transformations to the bounding boxes (such as cropping, scaling and flipping).
33 | The class accepts bounding boxes from two different input formats:
34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and
35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`.
36 | 
37 | Additionally, each `BoxList` instance can also hold arbitrary additional information
38 | for each bounding box, such as labels, visibility, probability scores etc.
39 | 
40 | Here is an example on how to create a `BoxList` from a list of coordinates:
41 | ```python
42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT
43 | 
44 | width = 100
45 | height = 200
46 | boxes = [
47 |   [0, 10, 50, 50],
48 |   [50, 20, 90, 60],
49 |   [10, 10, 50, 50]
50 | ]
51 | # create a BoxList with 3 boxes
52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy')
53 | 
54 | # perform some box transformations, has similar API as PIL.Image
55 | bbox_scaled = bbox.resize((width * 2, height * 3))
56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT)
57 | 
58 | # add labels for each bbox
59 | labels = torch.tensor([0, 10, 1])
60 | bbox.add_field('labels', labels)
61 | 
62 | # bbox also support a few operations, like indexing
63 | # here, selects boxes 0 and 2
64 | bbox_subset = bbox[[0, 2]]
65 | ```
66 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Mask-RCNN Benchmark
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `master`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Facebook's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces for indentation rather than tabs
34 | * 80 character line length
35 | * PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/)
36 | 
37 | ## License
38 | By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed
39 | under the LICENSE file in the root directory of this source tree.
40 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements:
 4 | - PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/
 5 | - torchvision from master
 6 | - cocoapi
 7 | - yacs
 8 | - matplotlib
 9 | - GCC >= 4.9
10 | - (optional) OpenCV for the webcam demo
11 | 
12 | 
13 | ### Option 1: Step-by-step installation
14 | 
15 | ```bash
16 | # first, make sure that your conda is setup properly with the right environment
17 | # for that, check that `which conda`, `which pip` and `which python` points to the
18 | # right path. From a clean conda env, this is what you need to do
19 | 
20 | conda create --name maskrcnn_benchmark
21 | source activate maskrcnn_benchmark
22 | 
23 | # this installs the right pip and dependencies for the fresh python
24 | conda install ipython
25 | 
26 | # maskrcnn_benchmark and coco api dependencies
27 | pip install ninja yacs cython matplotlib
28 | 
29 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
30 | # we give the instructions for CUDA 9.0
31 | conda install pytorch-nightly -c pytorch
32 | 
33 | # install torchvision
34 | cd ~/github
35 | git clone https://github.com/pytorch/vision.git
36 | cd vision
37 | python setup.py install
38 | 
39 | # install pycocotools
40 | cd ~/github
41 | git clone https://github.com/cocodataset/cocoapi.git
42 | cd cocoapi/PythonAPI
43 | python setup.py build_ext install
44 | 
45 | # install PyTorch Detection
46 | cd ~/github
47 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
48 | cd maskrcnn-benchmark
49 | # the following will install the lib with
50 | # symbolic links, so that you can modify
51 | # the files if you want and won't need to
52 | # re-build it
53 | python setup.py build develop
54 | 
55 | # or if you are on macOS
56 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
57 | ```
58 | 
59 | ### Option 2: Docker Image (Requires CUDA, Linux only)
60 | 
61 | Build image with defaults (`CUDA=9.0`, `CUDNN=7`):
62 | 
63 |     nvidia-docker build -t maskrcnn-benchmark docker/
64 |     
65 | Build image with other CUDA and CUDNN versions:
66 | 
67 |     nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ 
68 |     
69 | Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook):
70 | 
71 |     nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/
72 |     nvidia-docker run -td -p 8888:8888 -e PASSWORD=<password> -v <host-dir>:<container-dir> maskrcnn-benchmark-jupyter


--------------------------------------------------------------------------------
/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | ## Model Zoo and Baselines
 2 | 
 3 | ### Hardware
 4 | - 8 NVIDIA V100 GPUs
 5 | 
 6 | ### Software
 7 | - PyTorch version: 1.0.0a0+dd2c487
 8 | - CUDA 9.2
 9 | - CUDNN 7.1
10 | - NCCL 2.2.13-1
11 | 
12 | ### End-to-end Faster and Mask R-CNN baselines
13 | 
14 | All the baselines were trained using the exact same experimental setup as in Detectron.
15 | We initialize the detection models with ImageNet weights from Caffe2, the same as used by Detectron.
16 | 
17 | The pre-trained models are available in the link in the model id.
18 | 
19 | backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | mask AP | model id
20 | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
21 | R-50-C4 | Fast | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.17130 | 34.8 | - | [6358800](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_C4_1x.pth)
22 | R-50-FPN | Fast | 1x | 2 | 4.4 | 0.3530 | 8.8 | 0.12580 | 36.8 | - | [6358793](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_FPN_1x.pth)
23 | R-101-FPN | Fast | 1x | 2 | 7.1 | 0.4591 | 11.5 | 0.143149 | 39.1 | - | [6358804](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_101_FPN_1x.pth)
24 | X-101-32x8d-FPN | Fast | 1x | 1 | 7.6 | 0.7007 | 35.0 | 0.209965 | 41.2 | - | [6358717](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth)
25 | R-50-C4 | Mask | 1x | 1 | 5.8 | 0.4520 | 22.6 | 0.17796 + 0.028 | 35.6 | 31.5 | [6358801](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_C4_1x.pth)
26 | R-50-FPN | Mask | 1x | 2 | 5.2 | 0.4536 | 11.3 | 0.12966 + 0.034 | 37.8 | 34.2 | [6358792](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth)
27 | R-101-FPN | Mask | 1x | 2 | 7.9 | 0.5665 | 14.2 | 0.15384 + 0.034 | 40.1 | 36.1 | [6358805](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth)
28 | X-101-32x8d-FPN | Mask | 1x | 1 | 7.8 | 0.7562 | 37.8 | 0.21739 + 0.034 | 42.2 | 37.8 | [6358718](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth)
29 | 
30 | 
31 | ## Comparison with Detectron and mmdetection
32 | 
33 | In the following section, we compare our implementation with [Detectron](https://github.com/facebookresearch/Detectron)
34 | and [mmdetection](https://github.com/open-mmlab/mmdetection).
35 | The same remarks from [mmdetection](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#training-speed)
36 | about different hardware applies here.
37 | 
38 | ### Training speed
39 | 
40 | The numbers here are in seconds / iteration. The lower, the better.
41 | 
42 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
43 | -- | -- | -- | --
44 | Faster R-CNN R-50 C4 | 0.566 | - | 0.4036
45 | Faster R-CNN R-50 FPN | 0.544 | 0.554 | 0.3530
46 | Faster R-CNN R-101 FPN | 0.647 | - | 0.4591
47 | Faster R-CNN X-101-32x8d FPN | 0.799 | - | 0.7007
48 | Mask R-CNN R-50 C4 | 0.620 | - | 0.4520
49 | Mask R-CNN R-50 FPN | 0.889 | 0.690 | 0.4536
50 | Mask R-CNN R-101 FPN | 1.008 | - | 0.5665
51 | Mask R-CNN X-101-32x8d FPN | 0.961 | - | 0.7562
52 | 
53 | ### Training memory
54 | 
55 | The lower, the better
56 | 
57 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
58 | -- | -- | -- | --
59 | Faster R-CNN R-50 C4 | 6.3 | - | 5.8
60 | Faster R-CNN R-50 FPN | 7.2 | 4.9 | 4.4
61 | Faster R-CNN R-101 FPN | 8.9 | - | 7.1
62 | Faster R-CNN X-101-32x8d FPN | 7.0 | - | 7.6
63 | Mask R-CNN R-50 C4 | 6.6 | - | 5.8
64 | Mask R-CNN R-50 FPN | 8.6 | 5.9 | 5.2
65 | Mask R-CNN R-101 FPN | 10.2 | - | 7.9
66 | Mask R-CNN X-101-32x8d FPN | 7.7 | - | 7.8
67 | 
68 | ### Accuracy
69 | 
70 | The higher, the better
71 | 
72 | type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
73 | -- | -- | -- | --
74 | Faster R-CNN R-50 C4 | 34.8 | - | 34.8
75 | Faster R-CNN R-50 FPN | 36.7 | 36.7 | 36.8
76 | Faster R-CNN R-101 FPN | 39.4 | - | 39.1
77 | Faster R-CNN X-101-32x8d FPN | 41.3 | - | 41.2
78 | Mask R-CNN R-50 C4 | 35.8 & 31.4 | - | 35.6 & 31.5
79 | Mask R-CNN R-50 FPN | 37.7 & 33.9 | 37.5 & 34.4 | 37.8 & 34.2
80 | Mask R-CNN R-101 FPN | 40.0 & 35.9 | - | 40.1 & 36.1
81 | Mask R-CNN X-101-32x8d FPN | 42.1 & 37.3 | - | 42.2 & 37.8
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Learning Instance Occlusion
 2 | 
 3 | This is the code for the CVPR 2020 [paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Lazarow_Learning_Instance_Occlusion_for_Panoptic_Segmentation_CVPR_2020_paper.pdf) "Learning Instance Occlusion for Panoptic Segmentation".
 4 | 
 5 | This project is based off of the excellent [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). We extend it to:
 6 | 
 7 | 1. Support [Panoptic Segmentation/Panoptic FPN](https://arxiv.org/abs/1901.02446)
 8 | 2. Support learning an instance-wise relationship to determine occlusion
 9 | 3. Integrate this into the [existing greedy merging heuristic](https://arxiv.org/abs/1801.00868)
10 | 
11 | If you make use of the ideas or code in this project, please consider citing:
12 | 
13 | ```
14 | @InProceedings{Lazarow_2020_CVPR,
15 | author = {Lazarow, Justin and Lee, Kwonjoon and Shi, Kunyu and Tu, Zhuowen},
16 | title = {Learning Instance Occlusion for Panoptic Segmentation},
17 | booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
18 | month = {June},
19 | year = {2020}
20 | }
21 | ```
22 | 


--------------------------------------------------------------------------------
/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | 
 3 | Here is a compilation if common issues that you might face
 4 | while compiling / running this code:
 5 | 
 6 | ## Compilation errors when compiling the library
 7 | If you encounter build errors like the following:
 8 | ```
 9 | /usr/include/c++/6/type_traits:1558:8: note: provided for ‘template<class _From, class _To> struct std::is_convertible’
10 |      struct is_convertible
11 |         ^~~~~~~~~~~~~~
12 | /usr/include/c++/6/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>&&; bool <anonymous> = true; _Elements = {at::Tensor, at::Tensor, at::Tensor, at::Tensor}]’ not a return-statement
13 |      }
14 |  ^
15 | error: command '/usr/local/cuda/bin/nvcc' failed with exit status 1
16 | ```
17 | check your CUDA version and your `gcc` version.
18 | ```
19 | nvcc --version
20 | gcc --version
21 | ```
22 | If you are using CUDA 9.0 and gcc 6.4.0, then refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/25,
23 | which has a summary of the solution. Basically, CUDA 9.0 is not compatible with gcc 6.4.0.
24 | 
25 | ## ImportError: No module named maskrcnn_benchmark.config when running webcam.py
26 | 
27 | This means that `maskrcnn-benchmark` has not been properly installed.
28 | Refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/22 for a few possible issues.
29 | Note that we now support Python 2 as well.
30 | 
31 | ## Segmentation fault (core dumped) when running the library
32 | This probably means that you have compiled the library using GCC < 4.9, which is ABI incompatible with PyTorch.
33 | Indeed, during installation, you probably saw a message like
34 | ```
35 | Your compiler (g++ 4.8) may be ABI-incompatible with PyTorch!
36 | Please use a compiler that is ABI-compatible with GCC 4.9 and above.
37 | See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.
38 | 
39 | See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
40 | for instructions on how to install GCC 4.9 or higher.
41 | ```
42 | Follow the instructions on https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
43 | to install GCC 4.9 or higher, and try recompiling `maskrcnn-benchmark` again, after cleaning the
44 | `build` folder with
45 | ```
46 | rm -rf build
47 | ```
48 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | INPUT:
 4 |   MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024)
 5 |   MAX_SIZE_TRAIN: 2048
 6 |   MIN_SIZE_TEST: 1024
 7 |   MAX_SIZE_TEST: 2048
 8 | MODEL:
 9 |   META_ARCHITECTURE: "PanopticFPN"
10 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
11 |   BACKBONE:
12 |     CONV_BODY: "R-50-FPN"
13 |     OUT_CHANNELS: 256
14 |   RPN:
15 |     USE_FPN: True
16 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
17 |     PRE_NMS_TOP_N_TRAIN: 2000
18 |     PRE_NMS_TOP_N_TEST: 1000
19 |     POST_NMS_TOP_N_TEST: 1000
20 |     FPN_POST_NMS_TOP_N_TEST: 1000
21 |     USE_SEMANTIC_FEATURES: False
22 |     RPN_HEAD: "SingleConvRPNHead"
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |     NUM_CLASSES: 9
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   ROI_ORDER_HEAD:
41 |     BATCH_SIZE_PER_IMAGE: 128
42 |     SHARE_MASK_FEATURE_EXTRACTOR: False
43 |     ENSURE_CONSISTENCY: True
44 |     WEIGHT: 1.0
45 |   SEMANTIC:
46 |     USE_FPN: True
47 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
48 |     UPSAMPLE_MODULE: "One3x3ReLU"
49 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
50 |     COLLAPSE_THING_ONTOLOGY: True
51 |     UPSAMPLE_METHOD: "bilinear"
52 |     NUM_CLASSES: 11
53 |   PANOPTIC:
54 |     INSTANCE_WEIGHT: 1.0
55 |     SEMANTIC_WEIGHT: 1.0
56 |     COMPUTE_CC_RESULTS: False
57 |     COMPUTE_PRE_RESULTS: False
58 |   MASK_ON: True
59 |   ORDER_ON: False
60 |   WEIGHT: "/local/experiments/panoptic/cityscapes/05162019_155406/model_0020000.pth" # "/local/experiments/panoptic/cityscapes/06162019_162039/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0027500.pth"  #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0017500.pth" #"/local/experiments/03222019_005905_model_0020000.pth" #"/local/experiments/03222019_005905_model_0017500.pth" #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth" #"/local/experiments/03212019_225953_model_final.pth" #"/local/experiments/03212019_004629_model_0012500.pth" #"/local/experiments/panoptic/cityscapes/03212019_190229/model_final.pth" #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth" #"/local/experiments/03212019_004629_model_0010000.pth" #panoptic/cityscapes/03212019_161835/model_final.pth" # #"/local/experiments/panoptic/cityscapes/03212019_141521/model_final.pth"
61 |     #"/local/experiments/panoptic/cityscapes/03182019_141908/model_final.pth"  ##"/local/experiments/panoptic/cityscapes/03202019_183257/model_final.pth"#"/local/experiments/panoptic/cityscapes/03212019_000352/model_final.pth" #
62 |     #"/local/experiments/panoptic/cityscapes/03202019_135406/model_final.pth" ##
63 | DATASETS:
64 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",)
65 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_tiny_cocostyle",)
66 | DATALOADER:
67 |   SIZE_DIVISIBILITY: 32
68 | SOLVER:
69 |   IMS_PER_BATCH: 4
70 |   BASE_LR: 0.02
71 |   WEIGHT_DECAY: 0.0001
72 |   STEPS: (9000,)
73 |   MAX_ITER: 12000
74 |   RESUME_ITER: 8750
75 |   OVERRIDE_OPTIMIZER: True
76 | TEST:
77 |   IMS_PER_BATCH: 2
78 |   PREDICTION_PATHS: [] #"/local/experiments/panoptic/cityscapes/06162019_162039/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]#["/local/experiments/panoptic/cityscapes/03212019_190229/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_161835/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_141521/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/03212019_000352/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]
79 | TEMPORARY_DIR: "/local/tmp"
80 | NAME: "cityscapes"
81 | DESCRIPTION: "collapsed, fine-tuning order head"
82 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_order.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | INPUT:
 4 |   MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024)
 5 |   MAX_SIZE_TRAIN: 2048
 6 |   MIN_SIZE_TEST: 1024
 7 |   MAX_SIZE_TEST: 2048
 8 | MODEL:
 9 |   META_ARCHITECTURE: "PanopticFPN"
10 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
11 |   BACKBONE:
12 |     CONV_BODY: "R-50-FPN"
13 |     OUT_CHANNELS: 256
14 |   RPN:
15 |     USE_FPN: True
16 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
17 |     PRE_NMS_TOP_N_TRAIN: 2000
18 |     PRE_NMS_TOP_N_TEST: 1000
19 |     POST_NMS_TOP_N_TEST: 1000
20 |     FPN_POST_NMS_TOP_N_TEST: 1000
21 |     USE_SEMANTIC_FEATURES: False
22 |     RPN_HEAD: "SingleConvRPNHead"
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |     NUM_CLASSES: 9
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   ROI_ORDER_HEAD:
41 |     OVERLAP_THRESHOLD: 0.05
42 |     BATCH_SIZE_PER_IMAGE: 512
43 |     SHARE_MASK_FEATURE_EXTRACTOR: False
44 |     ENSURE_CONSISTENCY: False
45 |     WEIGHT: 1.0
46 |     ONLY_TRAIN: True
47 |   SEMANTIC:
48 |     USE_FPN: True
49 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
50 |     UPSAMPLE_MODULE: "One3x3ReLU"
51 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
52 |     COLLAPSE_THING_ONTOLOGY: True
53 |     UPSAMPLE_METHOD: "bilinear"
54 |     NUM_CLASSES: 11
55 |   PANOPTIC:
56 |     INSTANCE_WEIGHT: 1.0
57 |     SEMANTIC_WEIGHT: 1.0
58 |     COMPUTE_CC_RESULTS: False
59 |     COMPUTE_PRE_RESULTS: True
60 |   MASK_ON: True
61 |   ORDER_ON: True
62 |   WEIGHT: "/local/experiments/panoptic/cityscapes/06172019_233221/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth" #"/local/experiments/panoptic/cityscapes/06172019_190920/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth" #"/local/experiments/panoptic/cityscapes/05242019_172548/model_final.pth" #"/local/experiments/panoptic/cityscapes/05162019_155406/model_0032500.pth" # "/local/experiments/panoptic/cityscapes/05242019_172548/model_final.pth" # "/local/experiments/panoptic/cityscapes/06032019_154759/model_final.pth" # "/local/experiments/panoptic/cityscapes/06112019_225624/model_final.pth"
63 | DATASETS:
64 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle_overlap005",)
65 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) #("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",) #_overlap005
66 | DATALOADER:
67 |   SIZE_DIVISIBILITY: 32
68 | SOLVER:
69 |   IMS_PER_BATCH: 8
70 |   # 0.02 at 16 GPUs.
71 |   BASE_LR: 0.02
72 |   WEIGHT_DECAY: 0.0001
73 |   STEPS: (14000,) # this is wrong.
74 |   MAX_ITER: 19000
75 |   # RESUME_ITER: 16250
76 | TEST:
77 |   IMS_PER_BATCH: 8
78 |   INTRACLASS_OCCLUSION: True
79 |   #ORDER_ONLY: True
80 |   PREDICTION_PATHS: [] #["/local/experiments/panoptic/cityscapes/06042019_131251/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]
81 | TEMPORARY_DIR: "/tmp"
82 | NAME: "cityscapes"
83 | DESCRIPTION: "training order head, training new features, fixed solver I hope"
84 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | INPUT:
 4 |   MIN_SIZE_TRAIN: (800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, 1024)
 5 |   MAX_SIZE_TRAIN: 2048
 6 |   MIN_SIZE_TEST: 1024
 7 |   MAX_SIZE_TEST: 2048
 8 | MODEL:
 9 |   META_ARCHITECTURE: "PanopticFPN"
10 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
11 |   BACKBONE:
12 |     CONV_BODY: "R-50-FPN"
13 |     OUT_CHANNELS: 256
14 |   RPN:
15 |     USE_FPN: True
16 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
17 |     PRE_NMS_TOP_N_TRAIN: 2000
18 |     PRE_NMS_TOP_N_TEST: 1000
19 |     POST_NMS_TOP_N_TEST: 1000
20 |     FPN_POST_NMS_TOP_N_TEST: 1000
21 |     USE_SEMANTIC_FEATURES: False
22 |     RPN_HEAD: "SingleConvRPNHead"
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |     NUM_CLASSES: 9
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   ROI_ORDER_HEAD:
41 |     BATCH_SIZE_PER_IMAGE: 128
42 |     SHARE_MASK_FEATURE_EXTRACTOR: False
43 |     ENSURE_CONSISTENCY: True
44 |     WEIGHT: 1.0
45 |   SEMANTIC:
46 |     USE_FPN: True
47 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
48 |     UPSAMPLE_MODULE: "One3x3ReLU"
49 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
50 |     COLLAPSE_THING_ONTOLOGY: True
51 |     UPSAMPLE_METHOD: "bilinear"
52 |     NUM_CLASSES: 11
53 |   PANOPTIC:
54 |     INSTANCE_WEIGHT: 1.0
55 |     SEMANTIC_WEIGHT: 1.0
56 |     COMPUTE_CC_RESULTS: False
57 |     COMPUTE_PRE_RESULTS: True
58 |   FUSION:
59 |     CONFIDENCE_THRESHOLD: 0.6
60 |     OVERLAP_THRESHOLD: 0.5
61 |     STUFF_MINIMUM_AREA: 2048
62 |   MASK_ON: True
63 |   WEIGHT: "/local/experiments/panoptic/cityscapes/05162019_155406/model_final.pth"
64 | DATASETS:
65 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",)
66 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",)
67 | DATALOADER:
68 |   SIZE_DIVISIBILITY: 32
69 | SOLVER:
70 |   IMS_PER_BATCH: 8
71 |   BASE_LR: 0.01
72 |   WEIGHT_DECAY: 0.0001
73 |   STEPS: (24000,)
74 |   MAX_ITER: 36000
75 | TEST:
76 |   IMS_PER_BATCH: 8
77 |   PREDICTION_PATHS: ["/local/experiments/panoptic/cityscapes/05242019_133904/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]
78 | TEMPORARY_DIR: "/tmp"
79 | NAME: "cityscapes"
80 | DESCRIPTION: "renewing this effort"
81 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | INPUT:
 4 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
 5 |   MAX_SIZE_TRAIN: 2048
 6 |   MIN_SIZE_TEST: 1024
 7 |   MAX_SIZE_TEST: 2048
 8 | MODEL:
 9 |   META_ARCHITECTURE: "PanopticFPN"
10 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
11 |   BACKBONE:
12 |     CONV_BODY: "R-50-FPN"
13 |     OUT_CHANNELS: 256
14 |   RPN:
15 |     USE_FPN: True
16 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
17 |     PRE_NMS_TOP_N_TRAIN: 2000
18 |     PRE_NMS_TOP_N_TEST: 1000
19 |     POST_NMS_TOP_N_TEST: 1000
20 |     FPN_POST_NMS_TOP_N_TEST: 1000
21 |     USE_SEMANTIC_FEATURES: False
22 |     RPN_HEAD: "SingleConvRPNHead"
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |     NUM_CLASSES: 9
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   ROI_ORDER_HEAD:
41 |     BATCH_SIZE_PER_IMAGE: 128
42 |     SHARE_MASK_FEATURE_EXTRACTOR: False
43 |     ENSURE_CONSISTENCY: True
44 |     WEIGHT: 1.0
45 |   SEMANTIC:
46 |     USE_FPN: True
47 |     USE_DC: True
48 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvConcat"
49 |     UPSAMPLE_MODULE: "StraightDeconv"
50 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
51 |     COLLAPSE_THING_ONTOLOGY: False
52 |     UPSAMPLE_METHOD: "bilinear"
53 |     NUM_CLASSES: 11
54 |   PANOPTIC:
55 |     INSTANCE_WEIGHT: 1.0
56 |     SEMANTIC_WEIGHT: 1.0
57 |     COMPUTE_CC_RESULTS: False
58 |     COMPUTE_PRE_RESULTS: True
59 |   FUSION:
60 |     CONFIDENCE_THRESHOLD: 0.6
61 |     OVERLAP_THRESHOLD: 0.5
62 |     STUFF_MINIMUM_AREA: 2048
63 |   MASK_ON: True
64 |   WEIGHT: "/local/experiments/panoptic/cityscapes/06202019_120811/model_final.pth"
65 | DATASETS:
66 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",)
67 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",)
68 | DATALOADER:
69 |   SIZE_DIVISIBILITY: 32
70 | SOLVER:
71 |   IMS_PER_BATCH: 8
72 |   BASE_LR: 0.02
73 |   WEIGHT_DECAY: 0.0001
74 |   STEPS: (9000,)
75 |   MAX_ITER: 12000
76 | TEST:
77 |   IMS_PER_BATCH: 8
78 |   PREDICTION_PATHS: []
79 | TEMPORARY_DIR: "/local/tmp"
80 | NAME: "cityscapes"
81 | DESCRIPTION: "hopefully fixed the solver"
82 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_8gpu_e2e_mask_rcnn_R_50_FPN_1x_cocostyle_dc_order.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | INPUT:
 4 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
 5 |   MAX_SIZE_TRAIN: 2048
 6 |   MIN_SIZE_TEST: 1024
 7 |   MAX_SIZE_TEST: 2048
 8 | MODEL:
 9 |   META_ARCHITECTURE: "PanopticFPN"
10 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
11 |   BACKBONE:
12 |     CONV_BODY: "R-50-FPN"
13 |     OUT_CHANNELS: 256
14 |   RPN:
15 |     USE_FPN: True
16 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
17 |     PRE_NMS_TOP_N_TRAIN: 2000
18 |     PRE_NMS_TOP_N_TEST: 1000
19 |     POST_NMS_TOP_N_TEST: 1000
20 |     FPN_POST_NMS_TOP_N_TEST: 1000
21 |     USE_SEMANTIC_FEATURES: False
22 |     RPN_HEAD: "SingleConvRPNHead"
23 |   ROI_HEADS:
24 |     USE_FPN: True
25 |   ROI_BOX_HEAD:
26 |     POOLER_RESOLUTION: 7
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     POOLER_SAMPLING_RATIO: 2
29 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 |     PREDICTOR: "FPNPredictor"
31 |     NUM_CLASSES: 9
32 |   ROI_MASK_HEAD:
33 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
35 |     PREDICTOR: "MaskRCNNC4Predictor"
36 |     POOLER_RESOLUTION: 14
37 |     POOLER_SAMPLING_RATIO: 2
38 |     RESOLUTION: 28
39 |     SHARE_BOX_FEATURE_EXTRACTOR: False
40 |   ROI_ORDER_HEAD:
41 |     OVERLAP_THRESHOLD: 0.05
42 |     BATCH_SIZE_PER_IMAGE: 512
43 |     SHARE_MASK_FEATURE_EXTRACTOR: False
44 |     ENSURE_CONSISTENCY: False
45 |     WEIGHT: 1.0
46 |     ONLY_TRAIN: True
47 |   SEMANTIC:
48 |     USE_FPN: True
49 |     USE_DC: True
50 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvConcat"
51 |     UPSAMPLE_MODULE: "StraightDeconv"
52 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
53 |     COLLAPSE_THING_ONTOLOGY: False
54 |     UPSAMPLE_METHOD: "bilinear"
55 |     NUM_CLASSES: 11
56 |   PANOPTIC:
57 |     INSTANCE_WEIGHT: 1.0
58 |     SEMANTIC_WEIGHT: 1.0
59 |     COMPUTE_CC_RESULTS: False
60 |     COMPUTE_PRE_RESULTS: True
61 |   FUSION:
62 |     CONFIDENCE_THRESHOLD: 0.6
63 |     OVERLAP_THRESHOLD: 0.4
64 |     STUFF_MINIMUM_AREA: 2048
65 |   MASK_ON: True
66 |   ORDER_ON: True
67 |   WEIGHT: "/local/experiments/panoptic/cityscapes/06202019_120811/model_final.pth" # "/local/experiments/panoptic/cityscapes/06232019_173719/model_final.pth" #
68 | DATASETS:
69 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle_overlap005",)
70 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",)
71 | DATALOADER:
72 |   SIZE_DIVISIBILITY: 32
73 | SOLVER:
74 |   IMS_PER_BATCH: 8
75 |   BASE_LR: 0.02
76 |   WEIGHT_DECAY: 0.0001
77 |   STEPS: (9000,)
78 |   MAX_ITER: 12500
79 | TEST:
80 |   IMS_PER_BATCH: 8
81 |   INTRACLASS_OCCLUSION: True
82 |   PREDICTION_PATHS: ["/local/experiments/panoptic/cityscapes/06232019_221633/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06232019_230716/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06242019_085723/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] #["/local/experiments/panoptic/cityscapes/06232019_230716/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"] ["/local/experiments/panoptic/cityscapes/06242019_214756/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]
83 | TEMPORARY_DIR: "/local/tmp"
84 | NAME: "cityscapes"
85 | DESCRIPTION: "hopefully fixed the solver, trying at 005"
86 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |     NUM_CLASSES: 9
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   SEMANTIC:
36 |     USE_FPN: True
37 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
38 |     UPSAMPLE_MODULE: "One3x3ReLU"
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     COLLAPSE_THING_ONTOLOGY: False
41 |     UPSAMPLE_METHOD: "bilinear"
42 |     NUM_CLASSES: 11
43 |   PANOPTIC:
44 |     INSTANCE_WEIGHT: 1.0
45 |     SEMANTIC_WEIGHT: 1.0
46 |     COMPUTE_CC_RESULTS: False
47 |     COMPUTE_PRE_RESULTS: False
48 |   MASK_ON: True
49 | DATASETS:
50 |   TRAIN: ("panoptic_cityscapes_fine_instanceonly_seg_train_cocostyle",)
51 |   TEST: ("panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle",)
52 | DATALOADER:
53 |   SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 |   BASE_LR: 0.01
56 |   WEIGHT_DECAY: 0.0001
57 |   STEPS: (18000,)
58 |   MAX_ITER: 24000
59 | TEST:
60 |   PREDICTION_PATHS: [] #["/scratch/jlazarow/experiments/panoptic/cityscapes/03152019_164312/inference/panoptic_cityscapes_fine_instanceonly_seg_val_cocostyle/predictions.pth"]
61 | TEMPORARY_DIR: "/scratch/tmp"
62 | NAME: "cityscapes"
63 | DESCRIPTION: "dense ontology, finetuning from COCO"
64 | 


--------------------------------------------------------------------------------
/configs/panoptic_2gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: True
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleConvSumCombineScales"
37 |     CONV_HEAD_DIM: 128
38 |     UPSAMPLE_MODULE: "One3x3ReLU"
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     COLLAPSE_THING_ONTOLOGY: False
41 |     UPSAMPLE_METHOD: "bilinear"
42 |   PANOPTIC:
43 |     INSTANCE_WEIGHT: 1.0
44 |     SEMANTIC_WEIGHT: 0.5
45 |   MASK_ON: True
46 | DATASETS:
47 |   TRAIN: ("panoptic_coco_2017_train",)
48 |   TEST: ("panoptic_coco_2017_val",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   IMS_PER_BATCH: 4
53 |   BASE_LR: 0.005
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (240000, 320000)
56 |   MAX_ITER: 360000
57 | TEST:
58 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
59 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
60 |   PREDICTION_PATHS: []
61 | TEMPORARY_DIR: "/scratch/tmp"
62 | NAME: "prime_rpn"
63 | DESCRIPTION: "priming the RPN with residual semantic features 2GPU"
64 | 


--------------------------------------------------------------------------------
/configs/panoptic_2gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |     COMPUTE_CC_RESULTS: False
45 |   MASK_ON: True
46 |   ORDER_ON: True
47 |   WEIGHT: "/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth"
48 | DATASETS:
49 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
50 |   TEST: ("panoptic_coco_2017_test_dev",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (60000, 80000)
57 |   MAX_ITER: 90000
58 | TEST:
59 |   CHUNK_PREDICTIONS: True
60 |   PREDICTION_PATHS: []
61 |   IMS_PER_BATCH: 16
62 | TEMPORARY_DIR: "/local/tmp"
63 | NAME: "order"
64 | DESCRIPTION: "dense ontology"
65 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum" # "UpsampleConvSumCombineScales" #
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |   MASK_ON: True
45 | DATASETS:
46 |   TRAIN: ("panoptic_coco_2017_train",)
47 |   TEST: ("panoptic_coco_2017_val",)
48 | DATALOADER:
49 |   SIZE_DIVISIBILITY: 32
50 | SOLVER:
51 |   IMS_PER_BATCH: 8
52 |   BASE_LR: 0.01
53 |   WEIGHT_DECAY: 0.0001
54 |   STEPS: (120000, 160000)
55 |   MAX_ITER: 180000
56 | TEMPORARY_DIR: "/media/data/jlazarow"
57 | NAME: "baseline"
58 | DESCRIPTION: "dense ontology"
59 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4WithScorePredictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |   MASK_ON: True
45 |   FUSION_ON: True
46 | DATASETS:
47 |   TRAIN: ("panoptic_coco_2017_train",)
48 |   TEST: ("panoptic_coco_2017_val",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   IMS_PER_BATCH: 8
53 |   BASE_LR: 0.01
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (120000, 160000)
56 |   MAX_ITER: 180000
57 | TEMPORARY_DIR: "/media/data/jlazarow"
58 | NAME: "fusion"
59 | DESCRIPTION: "dense ontology, fusion e2e"
60 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   ROI_ORDER_HEAD:
35 |     OVERLAP_THRESHOLD: 0.10
36 |     BATCH_SIZE_PER_IMAGE: 256
37 |     SHARE_MASK_FEATURE_EXTRACTOR: False
38 |     ENSURE_CONSISTENCY: True
39 |     #ONLY_TRAIN: True
40 |     WEIGHT: 1.0
41 |   SEMANTIC:
42 |     USE_FPN: True
43 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
44 |     UPSAMPLE_MODULE: "One3x3ReLU"
45 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
46 |     COLLAPSE_THING_ONTOLOGY: False
47 |     UPSAMPLE_METHOD: "bilinear"
48 |   PANOPTIC:
49 |     INSTANCE_WEIGHT: 1.0
50 |     SEMANTIC_WEIGHT: 0.5
51 |   FUSION:
52 |     CONFIDENCE_THRESHOLD: 0.5
53 |     OVERLAP_THRESHOLD: 0.5
54 |     STUFF_MINIMUM_AREA: 4096
55 |   MASK_ON: True
56 |   ORDER_ON: False
57 |   WEIGHT:  "/local/experiments/panoptic/order/06262019_200346/model_final.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/02112019_151538/model_final.pth" # "/local/experiments/panoptic/order/06222019_165802/model_final.pth" "/local/experiments/panoptic/order/06232019_143107/model_final.pth" # "/local/experiments/panoptic/order/06262019_111528/model_final.pth" #
58 | DATASETS:
59 |   TRAIN: ("panoptic_coco_2017_train",)
60 |   TEST: ("panoptic_coco_2017_val",)
61 | DATALOADER:
62 |   SIZE_DIVISIBILITY: 32
63 | SOLVER:
64 |   IMS_PER_BATCH: 4
65 |   BASE_IMS_PER_BATCH: 8
66 |   BASE_LR: 0.01
67 |   WEIGHT_DECAY: 0.0001
68 |   STEPS: (60000, 120000)
69 |   MAX_ITER: 91000
70 | TEST:
71 |   PREDICTION_PATHS: ["/local/experiments/panoptic/order/06262019_205807/inference/panoptic_coco_2017_val/predictions.pth"] #"/local/experiments/panoptic/order/06262019_132748/inference/panoptic_coco_2017_val/predictions.pth"] #"/local/experiments/panoptic/order/06252019_143058/inference/panoptic_coco_2017_val/predictions.pth"]
72 | TEMPORARY_DIR: "/local/tmp/"
73 | NAME: "order"
74 | DESCRIPTION: "trying to fix everything but order head"
75 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_order_nodense.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: True
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |   MASK_ON: True
45 |   ORDER_ON: True
46 | DATASETS:
47 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
48 |   TEST: ("panoptic_coco_2017_val",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   IMS_PER_BATCH: 8
53 |   BASE_LR: 0.01
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (120000, 160000)
56 |   MAX_ITER: 180000
57 | TEMPORARY_DIR: "/media/data/jlazarow"
58 | NAME: "order"
59 | DESCRIPTION: "dense ontology, order e2e"
60 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: True
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleConvSumCombineScales"
37 |     CONV_HEAD_DIM: 128
38 |     UPSAMPLE_MODULE: "One3x3ReLU"
39 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 |     COLLAPSE_THING_ONTOLOGY: False
41 |     UPSAMPLE_METHOD: "bilinear"
42 |   PANOPTIC:
43 |     INSTANCE_WEIGHT: 1.0
44 |     SEMANTIC_WEIGHT: 0.5
45 |   MASK_ON: True
46 | DATASETS:
47 |   TRAIN: ("panoptic_coco_2017_train",)
48 |   TEST: ("panoptic_coco_2017_val",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   IMS_PER_BATCH: 8
53 |   BASE_LR: 0.01
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (120000, 160000)
56 |   MAX_ITER: 180000
57 | TEST:
58 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
59 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
60 |   PREDICTION_PATHS: []
61 | TEMPORARY_DIR: "/scratch/tmp"
62 | NAME: "prime_rpn"
63 | DESCRIPTION: "priming the RPN with residual semantic features 2GPU"
64 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_mask_rcnn_R_101_FPN_1x_test_dev.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   ROI_ORDER_HEAD:
35 |     BATCH_SIZE_PER_IMAGE: 128
36 |     SHARE_MASK_FEATURE_EXTRACTOR: False
37 |     ENSURE_CONSISTENCY: True
38 |     WEIGHT: 1.0
39 |   SEMANTIC:
40 |     USE_FPN: True
41 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
42 |     UPSAMPLE_MODULE: "One3x3ReLU"
43 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
44 |     COLLAPSE_THING_ONTOLOGY: True
45 |     UPSAMPLE_METHOD: "bilinear"
46 |   PANOPTIC:
47 |     INSTANCE_WEIGHT: 1.0
48 |     SEMANTIC_WEIGHT: 0.5
49 |     COMPUTE_CC_RESULTS: False
50 |     COMPUTE_PRE_RESULTS: False
51 |   MASK_ON: True
52 |   ORDER_ON: True
53 |   WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0180000.pth" #03092019_234338/model_0195200.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth" #/local/experiments/panoptic/order/03142019_125645/model_final.pth
54 | DATASETS:
55 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
56 |   TEST: ("panoptic_coco_2017_val",)
57 | DATALOADER:
58 |   SIZE_DIVISIBILITY: 32
59 | SOLVER:
60 |   IMS_PER_BATCH: 8
61 |   BASE_LR: 0.01
62 |   WEIGHT_DECAY: 0.0001
63 |   STEPS: (120000, 160000) #(60000, 80000)
64 |   MAX_ITER: 185000 #90000
65 | TEST:
66 |   CHUNK_PREDICTIONS: True
67 |   PREDICTION_PATHS: []
68 |   IMS_PER_BATCH: 8
69 | TEMPORARY_DIR: "/local/tmp"
70 | NAME: "order"
71 | DESCRIPTION: "dense ontology, R101"
72 | 


--------------------------------------------------------------------------------
/configs/panoptic_4gpu_mask_rcnn_R_50_FPN_1x_test_dev.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: True
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |     COMPUTE_CC_RESULTS: False
45 |   MASK_ON: True
46 |   ORDER_ON: True
47 |   WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0195200.pth" #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03062019_232440/model_final.pth"
48 | DATASETS:
49 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
50 |   TEST: ("panoptic_coco_2017_test_dev",)
51 | DATALOADER:
52 |   SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 |   BASE_LR: 0.02
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (60000, 80000)
57 |   MAX_ITER: 90000
58 | TEST:
59 |   CHUNK_PREDICTIONS: True
60 |   PREDICTION_PATHS: []
61 |   IMS_PER_BATCH: 16
62 | TEMPORARY_DIR: "/local/tmp"
63 | NAME: "order"
64 | DESCRIPTION: "dense ontology"
65 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_101_FPN_1x_order.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-101-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   ROI_ORDER_HEAD:
35 |     BATCH_SIZE_PER_IMAGE: 128
36 |     SHARE_MASK_FEATURE_EXTRACTOR: False
37 |     ENSURE_CONSISTENCY: True
38 |     WEIGHT: 1.0
39 |   SEMANTIC:
40 |     USE_FPN: True
41 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
42 |     UPSAMPLE_MODULE: "One3x3ReLU"
43 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
44 |     COLLAPSE_THING_ONTOLOGY: True
45 |     UPSAMPLE_METHOD: "bilinear"
46 |   PANOPTIC:
47 |     INSTANCE_WEIGHT: 1.0
48 |     SEMANTIC_WEIGHT: 0.5
49 |     COMPUTE_CC_RESULTS: False
50 |   MASK_ON: True
51 |   ORDER_ON: True
52 |   WEIGHT: "/mnt/cube/kwl042/panoptic_exps/panoptic/baseline/03092019_234338/model_0180000.pth"
53 | DATASETS:
54 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
55 |   TEST: ("panoptic_coco_2017_val",)
56 | DATALOADER:
57 |   SIZE_DIVISIBILITY: 32
58 | SOLVER:
59 |   IMS_PER_BATCH: 8
60 |   BASE_LR: 0.02
61 |   WEIGHT_DECAY: 0.0001
62 |   STEPS: (60000, 80000)
63 |   MAX_ITER: 90000
64 | TEST:
65 |   IMS_PER_BATCH: 8
66 |   PREDICTION_PATHS: []
67 | TEMPORARY_DIR: "/local/tmp"
68 | NAME: "order"
69 | DESCRIPTION: "collapsed ontology, R101, order own features"
70 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |   ROI_HEADS:
17 |     USE_FPN: True
18 |   ROI_BOX_HEAD:
19 |     POOLER_RESOLUTION: 7
20 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 |     POOLER_SAMPLING_RATIO: 2
22 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 |     PREDICTOR: "FPNPredictor"
24 |   ROI_MASK_HEAD:
25 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 |     PREDICTOR: "MaskRCNNC4Predictor"
28 |     POOLER_RESOLUTION: 14
29 |     POOLER_SAMPLING_RATIO: 2
30 |     RESOLUTION: 28
31 |     SHARE_BOX_FEATURE_EXTRACTOR: False
32 |   SEMANTIC:
33 |     USE_FPN: True
34 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
35 |     UPSAMPLE_MODULE: "One3x3ReLU"
36 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
37 |     COLLAPSE_THING_ONTOLOGY: False
38 |     UPSAMPLE_METHOD: "bilinear"
39 |   PANOPTIC:
40 |     INSTANCE_WEIGHT: 1.0
41 |     SEMANTIC_WEIGHT: 0.5
42 |   MASK_ON: True
43 | DATASETS:
44 |   TRAIN: ("panoptic_coco_2017_train",)
45 |   TEST: ("panoptic_coco_2017_val",)
46 | DATALOADER:
47 |   SIZE_DIVISIBILITY: 32
48 | SOLVER:
49 |   BASE_LR: 0.02
50 |   WEIGHT_DECAY: 0.0001
51 |   STEPS: (60000, 80000)
52 |   MAX_ITER: 90000
53 | TEST:
54 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
55 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
56 |   PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth"]
57 | TEMPORARY_DIR: "/media/data/jlazarow"
58 | NAME: "baseline"
59 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_fusion.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4WithScorePredictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |   MASK_ON: True
45 |   FUSION_ON: True
46 | DATASETS:
47 |   TRAIN: ("panoptic_coco_2017_train",)
48 |   TEST: ("panoptic_coco_2017_tiny",)
49 | DATALOADER:
50 |   SIZE_DIVISIBILITY: 32
51 | SOLVER:
52 |   BASE_LR: 0.02
53 |   WEIGHT_DECAY: 0.0001
54 |   STEPS: (60000, 80000)
55 |   MAX_ITER: 90000
56 | TEST:
57 |   PREDICTION_PATHS: []
58 | TEMPORARY_DIR: "/scratch/tmp"
59 | NAME: "fusion"
60 | DESCRIPTION: "dense ontology, fine tuning"
61 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     USE_SEMANTIC_FEATURES: True
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   ROI_ORDER_HEAD:
36 |     BATCH_SIZE_PER_IMAGE: 128
37 |     SHARE_MASK_FEATURE_EXTRACTOR: False
38 |     ENSURE_CONSISTENCY: True
39 |     WEIGHT: 1.0
40 |   SEMANTIC:
41 |     USE_FPN: True
42 |     SEMANTIC_HEAD: "UpsampleConvSumCombineScales" #"UpsampleEqualSizeConvSum"
43 |     UPSAMPLE_MODULE: "One3x3ReLU"
44 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
45 |     COLLAPSE_THING_ONTOLOGY: False
46 |     UPSAMPLE_METHOD: "bilinear"
47 |   PANOPTIC:
48 |     INSTANCE_WEIGHT: 1.0
49 |     SEMANTIC_WEIGHT: 0.5
50 |     COMPUTE_CC_RESULTS: False
51 |     COMPUTE_PRE_RESULTS: False
52 |   MASK_ON: True
53 |   ORDER_ON: True
54 | DATASETS:
55 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
56 |   TEST: ("panoptic_coco_2017_val",)
57 | DATALOADER:
58 |   SIZE_DIVISIBILITY: 32
59 | SOLVER:
60 |   BASE_LR: 0.02
61 |   WEIGHT_DECAY: 0.0001
62 |   STEPS: (60000, 80000)
63 |   MAX_ITER: 90000
64 | TEST:
65 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
66 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
67 |   PREDICTION_PATHS: [] #["/mnt/cube/jlazarow/experiments/panoptic/baseline/03072019_184209/inference/panoptic_coco_2017_val/predictions.pth"]
68 | TEMPORARY_DIR: "/local/tmp"
69 | NAME: "baseline"
70 | DESCRIPTION: "dense ontology, trying 0.6 confidence."
71 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_test.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |     COMPUTE_CC_RESULTS: False
45 |   MASK_ON: True
46 |   ORDER_ON: True
47 | DATASETS:
48 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
49 |   TEST: ("panoptic_coco_2017_test_dev",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   BASE_LR: 0.02
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (60000, 80000)
56 |   MAX_ITER: 90000
57 | TEST:
58 |   CHUNK_PREDICTIONS: True
59 |   PREDICTION_PATHS: [["/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file0.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file1.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file2.pth", "/local/experiments/panoptic/order/03112019_222258/inference/panoptic_coco_2017_test_dev/file3.pth"]]
60 |   #PREDICTION_PATHS: [] #"/local/experiments/panoptic/03102019_151246/inference/panoptic_coco_2017_test_dev/predictions.pth"]
61 | TEMPORARY_DIR: "/local/tmp"
62 | NAME: "baseline"
63 | DESCRIPTION: "for testdev from panoptic fusion baseline"
64 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_order_val.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: False
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |   ROI_BOX_HEAD:
21 |     POOLER_RESOLUTION: 7
22 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 |     POOLER_SAMPLING_RATIO: 2
24 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 |     PREDICTOR: "FPNPredictor"
26 |   ROI_MASK_HEAD:
27 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 |     PREDICTOR: "MaskRCNNC4Predictor"
30 |     POOLER_RESOLUTION: 14
31 |     POOLER_SAMPLING_RATIO: 2
32 |     RESOLUTION: 28
33 |     SHARE_BOX_FEATURE_EXTRACTOR: False
34 |   SEMANTIC:
35 |     USE_FPN: True
36 |     SEMANTIC_HEAD: "UpsampleEqualSizeConvSum"
37 |     UPSAMPLE_MODULE: "One3x3ReLU"
38 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 |     COLLAPSE_THING_ONTOLOGY: False
40 |     UPSAMPLE_METHOD: "bilinear"
41 |   PANOPTIC:
42 |     INSTANCE_WEIGHT: 1.0
43 |     SEMANTIC_WEIGHT: 0.5
44 |     COMPUTE_CC_RESULTS: False
45 |   MASK_ON: True
46 |   ORDER_ON: True
47 | DATASETS:
48 |   TRAIN: ("panoptic_coco_2017_train_overlap02",)
49 |   TEST: ("panoptic_coco_2017_val_overlap02",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   BASE_LR: 0.02
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (60000, 80000)
56 |   MAX_ITER: 90000
57 | TEST:
58 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
59 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
60 |   PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/panoptic/baseline/03072019_184209/inference/panoptic_coco_2017_val/predictions.pth"]
61 |   FEED_GROUND_TRUTH_INSTANCES: True
62 |   ORDER_ONLY: True
63 | TEMPORARY_DIR: "/scratch/tmp"
64 | NAME: "baseline"
65 | DESCRIPTION: "dense ontology"
66 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.py:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: True
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     USE_SEMANTIC_FEATURES: True
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   SEMANTIC:
36 |     USE_FPN: True
37 |     SEMANTIC_HEAD: "UpsampleConvSumCombineScales"
38 |     CONV_HEAD_DIM: 128
39 |     UPSAMPLE_MODULE: "One3x3ReLU"
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     COLLAPSE_THING_ONTOLOGY: False
42 |     UPSAMPLE_METHOD: "bilinear"
43 |   PANOPTIC:
44 |     INSTANCE_WEIGHT: 1.0
45 |     SEMANTIC_WEIGHT: 0.5
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("panoptic_coco_2017_train",)
49 |   TEST: ("panoptic_coco_2017_val",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   IMS_PER_BATCH: 8
54 |   BASE_LR: 0.01
55 |   WEIGHT_DECAY: 0.0001
56 |   STEPS: (120000, 160000)
57 |   MAX_ITER: 180000
58 | TEST:
59 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
60 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
61 |   PREDICTION_PATHS: []
62 | TEMPORARY_DIR: "/media/data/jlazarow"
63 | NAME: "prime_rpn_roi"
64 | DESCRIPTION: "priming the RPN and ROI heads with residual semantic features 4GPU"
65 | 


--------------------------------------------------------------------------------
/configs/panoptic_e2e_mask_rcnn_R_50_FPN_1x_semantic_residual.yaml:
--------------------------------------------------------------------------------
 1 | TASK:
 2 |   KIND: "Panoptic"
 3 | MODEL:
 4 |   META_ARCHITECTURE: "PanopticFPN"
 5 |   WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
 6 |   BACKBONE:
 7 |     CONV_BODY: "R-50-FPN"
 8 |     OUT_CHANNELS: 256
 9 |   RPN:
10 |     USE_FPN: True
11 |     ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 |     PRE_NMS_TOP_N_TRAIN: 2000
13 |     PRE_NMS_TOP_N_TEST: 1000
14 |     POST_NMS_TOP_N_TEST: 1000
15 |     FPN_POST_NMS_TOP_N_TEST: 1000
16 |     USE_SEMANTIC_FEATURES: True
17 |     RPN_HEAD: "SingleConvRPNHead"
18 |   ROI_HEADS:
19 |     USE_FPN: True
20 |     USE_SEMANTIC_FEATURES: True
21 |   ROI_BOX_HEAD:
22 |     POOLER_RESOLUTION: 7
23 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 |     POOLER_SAMPLING_RATIO: 2
25 |     FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 |     PREDICTOR: "FPNPredictor"
27 |   ROI_MASK_HEAD:
28 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 |     FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 |     PREDICTOR: "MaskRCNNC4Predictor"
31 |     POOLER_RESOLUTION: 14
32 |     POOLER_SAMPLING_RATIO: 2
33 |     RESOLUTION: 28
34 |     SHARE_BOX_FEATURE_EXTRACTOR: False
35 |   SEMANTIC:
36 |     USE_FPN: True
37 |     SEMANTIC_HEAD: "UpsampleConvSumCombineScales"
38 |     CONV_HEAD_DIM: 128
39 |     UPSAMPLE_MODULE: "One3x3ReLU"
40 |     POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 |     COLLAPSE_THING_ONTOLOGY: False
42 |     UPSAMPLE_METHOD: "bilinear"
43 |   PANOPTIC:
44 |     INSTANCE_WEIGHT: 1.0
45 |     SEMANTIC_WEIGHT: 0.5
46 |   MASK_ON: True
47 | DATASETS:
48 |   TRAIN: ("panoptic_coco_2017_train",)
49 |   TEST: ("panoptic_coco_2017_tiny",)
50 | DATALOADER:
51 |   SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 |   BASE_LR: 0.02
54 |   WEIGHT_DECAY: 0.0001
55 |   STEPS: (60000, 80000)
56 |   MAX_ITER: 90000
57 | TEST:
58 |   # nearest: /mnt/cube/jlazarow/experiments/02042019_112833/inference/panoptic_coco_2017_val/predictions.pth
59 |   # bilinear: /mnt/cube/jlazarow/experiments/02082019_110113/inference/panoptic_coco_2017_val/predictions.pth
60 |   PREDICTION_PATHS: [] #"/mnt/cube/jlazarow/experiments/panoptic/prime_rpn_roi/02192019_182845/inference/panoptic_coco_2017_val/predictions.pth"]
61 | TEMPORARY_DIR: "/scratch/tmp"
62 | NAME: "prime_rpn"
63 | DESCRIPTION: "priming the RPN with residual semantic features"
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
26 |                                 const at::Tensor& rois,
27 |                                 const float spatial_scale,
28 |                                 const int pooled_height,
29 |                                 const int pooled_width);
30 | 
31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
32 |                                  const at::Tensor& input,
33 |                                  const at::Tensor& rois,
34 |                                  const at::Tensor& argmax,
35 |                                  const float spatial_scale,
36 |                                  const int pooled_height,
37 |                                  const int pooled_width,
38 |                                  const int batch_size,
39 |                                  const int channels,
40 |                                  const int height,
41 |                                  const int width);
42 | 
43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
44 | 
45 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
46 |                              at::Tensor offset, at::Tensor output,
47 |                              at::Tensor columns, at::Tensor ones, int kW,
48 |                              int kH, int dW, int dH, int padW, int padH,
49 |                              int dilationW, int dilationH, int group,
50 |                              int deformable_group, int im2col_step);
51 | 
52 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
53 |                                     at::Tensor gradOutput, at::Tensor gradInput,
54 |                                     at::Tensor gradOffset, at::Tensor weight,
55 |                                     at::Tensor columns, int kW, int kH, int dW,
56 |                                     int dH, int padW, int padH, int dilationW,
57 |                                     int dilationH, int group,
58 |                                     int deformable_group, int im2col_step);
59 | 
60 | int deform_conv_backward_parameters_cuda(
61 |     at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
62 |     at::Tensor gradWeight,  // at::Tensor gradBias,
63 |     at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
64 |     int padW, int padH, int dilationW, int dilationH, int group,
65 |     int deformable_group, float scale, int im2col_step);
66 | 
67 | void modulated_deform_conv_cuda_forward(
68 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
69 |     at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
70 |     int kernel_h, int kernel_w, const int stride_h, const int stride_w,
71 |     const int pad_h, const int pad_w, const int dilation_h,
72 |     const int dilation_w, const int group, const int deformable_group,
73 |     const bool with_bias);
74 | 
75 | void modulated_deform_conv_cuda_backward(
76 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
77 |     at::Tensor offset, at::Tensor mask, at::Tensor columns,
78 |     at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
79 |     at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
80 |     int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
81 |     int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
82 |     const bool with_bias);
83 | 
84 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
85 |                              const int height,
86 |                              const int width);
87 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/deform_conv.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | 
 10 | // Interface for Python
 11 | int deform_conv_forward(
 12 |     at::Tensor input, 
 13 |     at::Tensor weight,
 14 |     at::Tensor offset, 
 15 |     at::Tensor output,
 16 |     at::Tensor columns, 
 17 |     at::Tensor ones, 
 18 |     int kW,
 19 |     int kH, 
 20 |     int dW, 
 21 |     int dH, 
 22 |     int padW, 
 23 |     int padH,
 24 |     int dilationW, 
 25 |     int dilationH, 
 26 |     int group,
 27 |     int deformable_group, 
 28 |     int im2col_step)
 29 | {
 30 |   if (input.type().is_cuda()) {
 31 | #ifdef WITH_CUDA
 32 |     return deform_conv_forward_cuda(
 33 |         input, weight, offset, output, columns, ones,
 34 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
 35 |         group, deformable_group, im2col_step
 36 |     );
 37 | #else
 38 |     AT_ERROR("Not compiled with GPU support");
 39 | #endif
 40 |   }
 41 |   AT_ERROR("Not implemented on the CPU");
 42 | }
 43 | 
 44 | 
 45 | int deform_conv_backward_input(
 46 |     at::Tensor input, 
 47 |     at::Tensor offset,
 48 |     at::Tensor gradOutput, 
 49 |     at::Tensor gradInput,
 50 |     at::Tensor gradOffset, 
 51 |     at::Tensor weight,
 52 |     at::Tensor columns, 
 53 |     int kW, 
 54 |     int kH, 
 55 |     int dW,
 56 |     int dH, 
 57 |     int padW, 
 58 |     int padH, 
 59 |     int dilationW,
 60 |     int dilationH, 
 61 |     int group,
 62 |     int deformable_group, 
 63 |     int im2col_step)
 64 | {
 65 |   if (input.type().is_cuda()) {
 66 | #ifdef WITH_CUDA
 67 |     return deform_conv_backward_input_cuda(
 68 |         input, offset, gradOutput, gradInput, gradOffset, weight, columns,
 69 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH, 
 70 |         group, deformable_group, im2col_step
 71 |     );
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   AT_ERROR("Not implemented on the CPU");
 77 | }
 78 | 
 79 | 
 80 | int deform_conv_backward_parameters(
 81 |     at::Tensor input, 
 82 |     at::Tensor offset, 
 83 |     at::Tensor gradOutput,
 84 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 85 |     at::Tensor columns, 
 86 |     at::Tensor ones, 
 87 |     int kW, 
 88 |     int kH, 
 89 |     int dW, 
 90 |     int dH,
 91 |     int padW, 
 92 |     int padH, 
 93 |     int dilationW, 
 94 |     int dilationH, 
 95 |     int group,
 96 |     int deformable_group, 
 97 |     float scale, 
 98 |     int im2col_step)
 99 | {
100 |   if (input.type().is_cuda()) {
101 | #ifdef WITH_CUDA
102 |     return deform_conv_backward_parameters_cuda(
103 |         input, offset, gradOutput, gradWeight, columns, ones,
104 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
105 |         group, deformable_group, scale, im2col_step
106 |     );
107 | #else
108 |     AT_ERROR("Not compiled with GPU support");
109 | #endif
110 |   }
111 |   AT_ERROR("Not implemented on the CPU");
112 | }
113 | 
114 | 
115 | void modulated_deform_conv_forward(
116 |     at::Tensor input, 
117 |     at::Tensor weight, 
118 |     at::Tensor bias, 
119 |     at::Tensor ones,
120 |     at::Tensor offset, 
121 |     at::Tensor mask, 
122 |     at::Tensor output, 
123 |     at::Tensor columns,
124 |     int kernel_h, 
125 |     int kernel_w, 
126 |     const int stride_h, 
127 |     const int stride_w,
128 |     const int pad_h, 
129 |     const int pad_w, 
130 |     const int dilation_h,
131 |     const int dilation_w, 
132 |     const int group, 
133 |     const int deformable_group,
134 |     const bool with_bias)
135 | {
136 |   if (input.type().is_cuda()) {
137 | #ifdef WITH_CUDA
138 |     return modulated_deform_conv_cuda_forward(
139 |         input, weight, bias, ones, offset, mask, output, columns,
140 |         kernel_h, kernel_w, stride_h, stride_w, 
141 |         pad_h, pad_w, dilation_h, dilation_w,
142 |         group, deformable_group, with_bias
143 |     );
144 | #else
145 |     AT_ERROR("Not compiled with GPU support");
146 | #endif
147 |   }
148 |   AT_ERROR("Not implemented on the CPU");
149 | }
150 | 
151 | 
152 | void modulated_deform_conv_backward(
153 |     at::Tensor input, 
154 |     at::Tensor weight, 
155 |     at::Tensor bias, 
156 |     at::Tensor ones,
157 |     at::Tensor offset, 
158 |     at::Tensor mask, 
159 |     at::Tensor columns,
160 |     at::Tensor grad_input, 
161 |     at::Tensor grad_weight, 
162 |     at::Tensor grad_bias,
163 |     at::Tensor grad_offset, 
164 |     at::Tensor grad_mask, 
165 |     at::Tensor grad_output,
166 |     int kernel_h, 
167 |     int kernel_w, 
168 |     int stride_h, 
169 |     int stride_w, 
170 |     int pad_h,
171 |     int pad_w, 
172 |     int dilation_h, 
173 |     int dilation_w, 
174 |     int group, 
175 |     int deformable_group,
176 |     const bool with_bias)
177 | {
178 |   if (input.type().is_cuda()) {
179 | #ifdef WITH_CUDA
180 |     return modulated_deform_conv_cuda_backward(
181 |         input, weight, bias, ones, offset, mask, columns, 
182 |         grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output,
183 |         kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
184 |         group, deformable_group, with_bias
185 |     );
186 | #else
187 |     AT_ERROR("Not compiled with GPU support");
188 | #endif
189 |   }
190 |   AT_ERROR("Not implemented on the CPU");
191 | }


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "deform_conv.h"
 6 | 
 7 | 
 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 9 |   m.def("nms", &nms, "non-maximum suppression");
10 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
11 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
12 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
13 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
14 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
15 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
16 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
17 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
18 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
19 | }
20 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset, COCOTestDataset
3 | from .panoptic_cityscapes import PanopticCityscapesDataset
4 | from .panoptic_coco import PanopticCOCODataset
5 | from .voc import PascalVOCDataset
6 | from .concat_dataset import ConcatDataset
7 | 
8 | __all__ = ["COCODataset", "PanopticCityscapesDataset", "PanopticCOCODataset", "ConcatDataset", "PascalVOCDataset", "COCOTestDataset"]
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | 
 6 | 
 7 | def evaluate(dataset, predictions, output_folder, **kwargs):
 8 |     """evaluate dataset using different methods based on dataset type.
 9 |     Args:
10 |         dataset: Dataset object
11 |         predictions(list[BoxList]): each item in the list represents the
12 |             prediction results for one image.
13 |         output_folder: output folder, to save evaluation files or results.
14 |         **kwargs: other args.
15 |     Returns:
16 |         evaluation result
17 |     """
18 |     args = dict(
19 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
20 |     )
21 | 
22 |     # todo, change this at some point.
23 |     if isinstance(dataset, (datasets.COCODataset, datasets.PanopticCOCODataset, datasets.COCOTestDataset, datasets.PanopticCityscapesDataset)):
24 |         return coco_evaluation(**args)
25 |     elif isinstance(dataset, datasets.PascalVOCDataset):
26 |         return voc_evaluation(**args)
27 |     else:
28 |         dataset_name = dataset.__class__.__name__
29 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
30 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     box_only,
 9 |     iou_types,
10 |     expected_results,
11 |     expected_results_sigma_tol,
12 |     working_directory,
13 |     save_panoptic_results,
14 |     save_pre_results,
15 |     panoptic_confidence_thresh,
16 |     panoptic_overlap_thresh,
17 |     panoptic_stuff_min_area):
18 |     return do_coco_evaluation(
19 |         dataset=dataset,
20 |         predictions=predictions,
21 |         box_only=box_only,
22 |         output_folder=output_folder,
23 |         iou_types=iou_types,
24 |         expected_results=expected_results,
25 |         expected_results_sigma_tol=expected_results_sigma_tol,
26 |         working_directory=working_directory,
27 |         save_panoptic_results=save_panoptic_results,
28 |         save_pre_results=save_pre_results,
29 |         panoptic_confidence_thresh=panoptic_confidence_thresh,
30 |         panoptic_overlap_thresh=panoptic_overlap_thresh,
31 |         panoptic_stuff_min_area=panoptic_stuff_min_area)
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.utils.data
  5 | from PIL import Image
  6 | import sys
  7 | 
  8 | if sys.version_info[0] == 2:
  9 |     import xml.etree.cElementTree as ET
 10 | else:
 11 |     import xml.etree.ElementTree as ET
 12 | 
 13 | 
 14 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 15 | 
 16 | 
 17 | class PascalVOCDataset(torch.utils.data.Dataset):
 18 | 
 19 |     CLASSES = (
 20 |         "__background__ ",
 21 |         "aeroplane",
 22 |         "bicycle",
 23 |         "bird",
 24 |         "boat",
 25 |         "bottle",
 26 |         "bus",
 27 |         "car",
 28 |         "cat",
 29 |         "chair",
 30 |         "cow",
 31 |         "diningtable",
 32 |         "dog",
 33 |         "horse",
 34 |         "motorbike",
 35 |         "person",
 36 |         "pottedplant",
 37 |         "sheep",
 38 |         "sofa",
 39 |         "train",
 40 |         "tvmonitor",
 41 |     )
 42 | 
 43 |     def __init__(self, data_dir, split, use_difficult=False, transforms=None):
 44 |         self.root = data_dir
 45 |         self.image_set = split
 46 |         self.keep_difficult = use_difficult
 47 |         self.transforms = transforms
 48 | 
 49 |         self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
 50 |         self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
 51 |         self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
 52 | 
 53 |         with open(self._imgsetpath % self.image_set) as f:
 54 |             self.ids = f.readlines()
 55 |         self.ids = [x.strip("\n") for x in self.ids]
 56 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 57 | 
 58 |         cls = PascalVOCDataset.CLASSES
 59 |         self.class_to_ind = dict(zip(cls, range(len(cls))))
 60 | 
 61 |     def __getitem__(self, index):
 62 |         img_id = self.ids[index]
 63 |         img = Image.open(self._imgpath % img_id).convert("RGB")
 64 | 
 65 |         target = self.get_groundtruth(index)
 66 |         target = target.clip_to_image(remove_empty=True)
 67 | 
 68 |         if self.transforms is not None:
 69 |             img, target = self.transforms(img, target)
 70 | 
 71 |         return img, target, index
 72 | 
 73 |     def __len__(self):
 74 |         return len(self.ids)
 75 | 
 76 |     def get_groundtruth(self, index):
 77 |         img_id = self.ids[index]
 78 |         anno = ET.parse(self._annopath % img_id).getroot()
 79 |         anno = self._preprocess_annotation(anno)
 80 | 
 81 |         height, width = anno["im_info"]
 82 |         target = BoxList(anno["boxes"], (width, height), mode="xyxy")
 83 |         target.add_field("labels", anno["labels"])
 84 |         target.add_field("difficult", anno["difficult"])
 85 |         return target
 86 | 
 87 |     def _preprocess_annotation(self, target):
 88 |         boxes = []
 89 |         gt_classes = []
 90 |         difficult_boxes = []
 91 |         TO_REMOVE = 1
 92 |         
 93 |         for obj in target.iter("object"):
 94 |             difficult = int(obj.find("difficult").text) == 1
 95 |             if not self.keep_difficult and difficult:
 96 |                 continue
 97 |             name = obj.find("name").text.lower().strip()
 98 |             bb = obj.find("bndbox")
 99 |             # Make pixel indexes 0-based
100 |             # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211"
101 |             box = [
102 |                 bb.find("xmin").text, 
103 |                 bb.find("ymin").text, 
104 |                 bb.find("xmax").text, 
105 |                 bb.find("ymax").text,
106 |             ]
107 |             bndbox = tuple(
108 |                 map(lambda x: x - TO_REMOVE, list(map(int, box)))
109 |             )
110 | 
111 |             boxes.append(bndbox)
112 |             gt_classes.append(self.class_to_ind[name])
113 |             difficult_boxes.append(difficult)
114 | 
115 |         size = target.find("size")
116 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
117 | 
118 |         res = {
119 |             "boxes": torch.tensor(boxes, dtype=torch.float32),
120 |             "labels": torch.tensor(gt_classes),
121 |             "difficult": torch.tensor(difficult_boxes),
122 |             "im_info": im_info,
123 |         }
124 |         return res
125 | 
126 |     def get_img_info(self, index):
127 |         img_id = self.ids[index]
128 |         anno = ET.parse(self._annopath % img_id).getroot()
129 |         size = anno.find("size")
130 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
131 |         return {"height": im_info[0], "width": im_info[1]}
132 | 
133 |     def map_class_id_to_class_name(self, class_id):
134 |         return PascalVOCDataset.CLASSES[class_id]
135 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = True
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset : offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import itertools
  3 | 
  4 | import torch
  5 | from torch.utils.data.sampler import BatchSampler
  6 | from torch.utils.data.sampler import Sampler
  7 | 
  8 | 
  9 | class GroupedBatchSampler(BatchSampler):
 10 |     """
 11 |     Wraps another sampler to yield a mini-batch of indices.
 12 |     It enforces that elements from the same group should appear in groups of batch_size.
 13 |     It also tries to provide mini-batches which follows an ordering which is
 14 |     as close as possible to the ordering from the original sampler.
 15 | 
 16 |     Arguments:
 17 |         sampler (Sampler): Base sampler.
 18 |         batch_size (int): Size of mini-batch.
 19 |         drop_uneven (bool): If ``True``, the sampler will drop the batches whose
 20 |             size is less than ``batch_size``
 21 | 
 22 |     """
 23 | 
 24 |     def __init__(self, sampler, group_ids, batch_size, drop_uneven=False):
 25 |         if not isinstance(sampler, Sampler):
 26 |             raise ValueError(
 27 |                 "sampler should be an instance of "
 28 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
 29 |             )
 30 |         self.sampler = sampler
 31 |         self.group_ids = torch.as_tensor(group_ids)
 32 |         assert self.group_ids.dim() == 1
 33 |         self.batch_size = batch_size
 34 |         self.drop_uneven = drop_uneven
 35 | 
 36 |         self.groups = torch.unique(self.group_ids).sort(0)[0]
 37 | 
 38 |         self._can_reuse_batches = False
 39 | 
 40 |     def _prepare_batches(self):
 41 |         dataset_size = len(self.group_ids)
 42 |         # get the sampled indices from the sampler
 43 |         sampled_ids = torch.as_tensor(list(self.sampler))
 44 |         # potentially not all elements of the dataset were sampled
 45 |         # by the sampler (e.g., DistributedSampler).
 46 |         # construct a tensor which contains -1 if the element was
 47 |         # not sampled, and a non-negative number indicating the
 48 |         # order where the element was sampled.
 49 |         # for example. if sampled_ids = [3, 1] and dataset_size = 5,
 50 |         # the order is [-1, 1, -1, 0, -1]
 51 |         order = torch.full((dataset_size,), -1, dtype=torch.int64)
 52 |         order[sampled_ids] = torch.arange(len(sampled_ids))
 53 | 
 54 |         # get a mask with the elements that were sampled
 55 |         mask = order >= 0
 56 | 
 57 |         # find the elements that belong to each individual cluster
 58 |         clusters = [(self.group_ids == i) & mask for i in self.groups]
 59 |         # get relative order of the elements inside each cluster
 60 |         # that follows the order from the sampler
 61 |         relative_order = [order[cluster] for cluster in clusters]
 62 |         # with the relative order, find the absolute order in the
 63 |         # sampled space
 64 |         permutation_ids = [s[s.sort()[1]] for s in relative_order]
 65 |         # permute each cluster so that they follow the order from
 66 |         # the sampler
 67 |         permuted_clusters = [sampled_ids[idx] for idx in permutation_ids]
 68 | 
 69 |         # splits each cluster in batch_size, and merge as a list of tensors
 70 |         splits = [c.split(self.batch_size) for c in permuted_clusters]
 71 |         merged = tuple(itertools.chain.from_iterable(splits))
 72 | 
 73 |         # now each batch internally has the right order, but
 74 |         # they are grouped by clusters. Find the permutation between
 75 |         # different batches that brings them as close as possible to
 76 |         # the order that we have in the sampler. For that, we will consider the
 77 |         # ordering as coming from the first element of each batch, and sort
 78 |         # correspondingly
 79 |         first_element_of_batch = [t[0].item() for t in merged]
 80 |         # get and inverse mapping from sampled indices and the position where
 81 |         # they occur (as returned by the sampler)
 82 |         inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())}
 83 |         # from the first element in each batch, get a relative ordering
 84 |         first_index_of_batch = torch.as_tensor(
 85 |             [inv_sampled_ids_map[s] for s in first_element_of_batch]
 86 |         )
 87 | 
 88 |         # permute the batches so that they approximately follow the order
 89 |         # from the sampler
 90 |         permutation_order = first_index_of_batch.sort(0)[1].tolist()
 91 |         # finally, permute the batches
 92 |         batches = [merged[i].tolist() for i in permutation_order]
 93 | 
 94 |         if self.drop_uneven:
 95 |             kept = []
 96 |             for batch in batches:
 97 |                 if len(batch) == self.batch_size:
 98 |                     kept.append(batch)
 99 |             batches = kept
100 |         return batches
101 | 
102 |     def __iter__(self):
103 |         if self._can_reuse_batches:
104 |             batches = self._batches
105 |             self._can_reuse_batches = False
106 |         else:
107 |             batches = self._prepare_batches()
108 |         self._batches = batches
109 |         return iter(batches)
110 | 
111 |     def __len__(self):
112 |         if not hasattr(self, "_batches"):
113 |             self._batches = self._prepare_batches()
114 |             self._can_reuse_batches = True
115 |         return len(self._batches)
116 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |     else:
11 |         min_size = cfg.INPUT.MIN_SIZE_TEST
12 |         max_size = cfg.INPUT.MAX_SIZE_TEST
13 |         flip_prob = 0
14 | 
15 |     to_bgr255 = cfg.INPUT.TO_BGR255
16 |     normalize_transform = T.Normalize(
17 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
18 |     )
19 | 
20 |     transform = T.Compose(
21 |         [
22 |             T.Resize(min_size, max_size),
23 |             T.RandomHorizontalFlip(flip_prob),
24 |             T.ToTensor(),
25 |             normalize_transform,
26 |         ]
27 |     )
28 |     return transform
29 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import random
  3 | 
  4 | import torch
  5 | import torchvision
  6 | from torchvision.transforms import functional as F
  7 | 
  8 | 
  9 | class Compose(object):
 10 |     def __init__(self, transforms):
 11 |         self.transforms = transforms
 12 | 
 13 |     def __call__(self, image, target):
 14 |         for t in self.transforms:
 15 |             image, target = t(image, target)
 16 |         return image, target
 17 | 
 18 |     def __repr__(self):
 19 |         format_string = self.__class__.__name__ + "("
 20 |         for t in self.transforms:
 21 |             format_string += "\n"
 22 |             format_string += "    {0}".format(t)
 23 |         format_string += "\n)"
 24 |         return format_string
 25 | 
 26 | """
 27 | class Resize(object):
 28 |     def __init__(self, min_size, max_size):
 29 |         self.min_size = min_size
 30 |         self.max_size = max_size
 31 | 
 32 |     # modified from torchvision to add support for max size
 33 |     def get_size(self, image_size):
 34 |         w, h = image_size
 35 |         size = self.min_size
 36 |         max_size = self.max_size
 37 |         if max_size is not None:
 38 |             min_original_size = float(min((w, h)))
 39 |             max_original_size = float(max((w, h)))
 40 |             if max_original_size / min_original_size * size > max_size:
 41 |                 size = int(round(max_size * min_original_size / max_original_size))
 42 | 
 43 |         if (w <= h and w == size) or (h <= w and h == size):
 44 |             return (h, w)
 45 | 
 46 |         if w < h:
 47 |             ow = size
 48 |             oh = int(size * h / w)
 49 |         else:
 50 |             oh = size
 51 |             ow = int(size * w / h)
 52 | 
 53 |         return (oh, ow)
 54 | """
 55 | class Resize(object):
 56 |     def __init__(self, min_size, max_size):
 57 |         if not isinstance(min_size, (list, tuple)):
 58 |             min_size = (min_size,)
 59 |         self.min_size = min_size
 60 |         self.max_size = max_size
 61 | 
 62 |     # modified from torchvision to add support for max size
 63 |     def get_size(self, image_size):
 64 |         w, h = image_size
 65 |         size = random.choice(self.min_size)
 66 |         max_size = self.max_size
 67 |         """
 68 |         if max_size is not None:
 69 |             min_original_size = float(min((w, h)))
 70 |             max_original_size = float(max((w, h)))
 71 |             if max_original_size / min_original_size * size > max_size:
 72 |                 size = int(round(max_size * min_original_size / max_original_size))
 73 |         """
 74 |         if (w <= h and w == size) or (h <= w and h == size):
 75 |             return (h, w)
 76 | 
 77 |         if w < h:
 78 |             ow = size
 79 |             oh = int(size * h / w)
 80 |         else:
 81 |             oh = size
 82 |             ow = int(size * w / h)
 83 | 
 84 |         return (oh, ow)
 85 | 
 86 |     def __call__(self, image, target):
 87 |         size = self.get_size(image.size)
 88 |         image = F.resize(image, size)
 89 |         target = target.resize(image.size)
 90 |         return image, target
 91 | 
 92 | class RandomHorizontalFlip(object):
 93 |     def __init__(self, prob=0.5):
 94 |         self.prob = prob
 95 | 
 96 |     def __call__(self, image, target):
 97 |         if random.random() < self.prob:
 98 |             image = F.hflip(image)
 99 |             target = target.transpose(0)
100 |         return image, target
101 | 
102 | 
103 | class ToTensor(object):
104 |     def __call__(self, image, target):
105 |         return F.to_tensor(image), target
106 | 
107 | 
108 | class Normalize(object):
109 |     def __init__(self, mean, std, to_bgr255=True):
110 |         self.mean = mean
111 |         self.std = std
112 |         self.to_bgr255 = to_bgr255
113 | 
114 |     def __call__(self, image, target):
115 |         if self.to_bgr255:
116 |             image = image[[2, 1, 0]] * 255
117 |         image = F.normalize(image, mean=self.mean, std=self.std)
118 |         return image, target
119 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import ConvTranspose2d
 7 | from .misc import interpolate
 8 | from .nms import nms
 9 | from .roi_align import ROIAlign
10 | from .roi_align import roi_align
11 | from .roi_pool import ROIPool
12 | from .roi_pool import roi_pool
13 | from .smooth_l1_loss import smooth_l1_loss
14 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
15 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
16 | 
17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 
18 |            "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", 
19 |            "FrozenBatchNorm2d"]
20 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         scale = self.weight * self.running_var.rsqrt()
21 |         bias = self.bias - self.running_mean * scale
22 |         scale = scale.reshape(1, -1, 1, 1)
23 |         bias = bias.reshape(1, -1, 1, 1)
24 |         return x * scale + bias
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | #
4 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from maskrcnn_benchmark import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | from torch import nn
 5 | 
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform
 8 | from . import fpn as fpn_module
 9 | from . import resnet
10 | 
11 | 
12 | @registry.BACKBONES.register("R-50-C4")
13 | @registry.BACKBONES.register("R-50-C5")
14 | @registry.BACKBONES.register("R-101-C4")
15 | @registry.BACKBONES.register("R-101-C5")
16 | def build_resnet_backbone(cfg):
17 |     body = resnet.ResNet(cfg)
18 |     model = nn.Sequential(OrderedDict([("body", body)]))
19 |     return model
20 | 
21 | 
22 | @registry.BACKBONES.register("R-50-FPN")
23 | @registry.BACKBONES.register("R-101-FPN")
24 | def build_resnet_fpn_backbone(cfg):
25 |     body = resnet.ResNet(cfg)
26 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
27 |     out_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
28 |     fpn = fpn_module.FPN(
29 |         in_channels_list=[
30 |             in_channels_stage2,
31 |             in_channels_stage2 * 2,
32 |             in_channels_stage2 * 4,
33 |             in_channels_stage2 * 8,
34 |         ],
35 |         out_channels=out_channels,
36 |         conv_block=conv_with_kaiming_uniform(
37 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
38 |         ),
39 |         top_blocks=fpn_module.LastLevelMaxPool(),
40 |     )
41 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
42 |     return model
43 | 
44 | 
45 | def build_backbone(cfg):
46 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
47 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
48 |             cfg.MODEL.BACKBONE.CONV_BODY
49 |         )
50 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
51 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | 
 7 | class FPN(nn.Module):
 8 |     """
 9 |     Module that adds FPN on top of a list of feature maps.
10 |     The feature maps are currently supposed to be in increasing depth
11 |     order, and must be consecutive
12 |     """
13 | 
14 |     def __init__(
15 |         self, in_channels_list, out_channels, conv_block, top_blocks=None
16 |     ):
17 |         """
18 |         Arguments:
19 |             in_channels_list (list[int]): number of channels for each feature map that
20 |                 will be fed
21 |             out_channels (int): number of channels of the FPN representation
22 |             top_blocks (nn.Module or None): if provided, an extra operation will
23 |                 be performed on the output of the last (smallest resolution)
24 |                 FPN output, and the result will extend the result list
25 |         """
26 |         super(FPN, self).__init__()
27 |         self.inner_blocks = []
28 |         self.layer_blocks = []
29 |         for idx, in_channels in enumerate(in_channels_list, 1):
30 |             inner_block = "fpn_inner{}".format(idx)
31 |             layer_block = "fpn_layer{}".format(idx)
32 |             inner_block_module = conv_block(in_channels, out_channels, 1)
33 |             layer_block_module = conv_block(out_channels, out_channels, 3, 1)
34 |             self.add_module(inner_block, inner_block_module)
35 |             self.add_module(layer_block, layer_block_module)
36 |             self.inner_blocks.append(inner_block)
37 |             self.layer_blocks.append(layer_block)
38 |         self.top_blocks = top_blocks
39 | 
40 |     def forward(self, x):
41 |         """
42 |         Arguments:
43 |             x (list[Tensor]): feature maps for each feature level.
44 |         Returns:
45 |             results (tuple[Tensor]): feature maps after FPN layers.
46 |                 They are ordered from highest resolution first.
47 |         """
48 | 
49 |         # the deepest layer gets the 1x1 -> 3x3 treatment.
50 |         last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
51 |         results = []
52 |         results.append(getattr(self, self.layer_blocks[-1])(last_inner))
53 | 
54 |         # ignore the last and reverse order (deepest to shallowest).
55 |         for feature, inner_block, layer_block in zip(
56 |             x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
57 |         ):
58 |             # nearest-neighbor upsampling of the top-down path.
59 |             inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
60 | 
61 |             # 1x1 treatment.
62 |             inner_lateral = getattr(self, inner_block)(feature)
63 |             # TODO use size instead of scale to make it robust to different sizes
64 |             # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
65 |             # mode='bilinear', align_corners=False)
66 | 
67 |             # add to resized top down.
68 |             last_inner = inner_lateral + inner_top_down
69 | 
70 |             # 3x3 treatment.
71 |             results.insert(0, getattr(self, layer_block)(last_inner))
72 | 
73 |         if self.top_blocks is not None:
74 |             # this should reference the 1x1 -> 3x3 treatment of the deepest block.
75 |             # looks like this is usually just a max pool.
76 |             last_results = self.top_blocks(results[-1])
77 |             results.extend(last_results)
78 | 
79 |         return tuple(results)
80 | 
81 | 
82 | class LastLevelMaxPool(nn.Module):
83 |     def forward(self, x):
84 |         return [F.max_pool2d(x, 1, 2, 0)]
85 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.uint8
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.uint8
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | from .panoptic_fpn import PanopticFPN
 4 | 
 5 | 
 6 | # we could probably get away with re-using GeneralizedRCNN for panoptic at some point.
 7 | _DETECTION_META_ARCHITECTURES = {
 8 |     "GeneralizedRCNN": GeneralizedRCNN,
 9 |     "PanopticFPN": PanopticFPN
10 | }
11 | 
12 | def build_detection_model(cfg):
13 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
14 |     return meta_arch(cfg)
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..roi_heads.roi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRCNN, self).__init__()
28 | 
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg)
31 |         self.roi_heads = build_roi_heads(cfg)
32 | 
33 |     def forward(self, images, targets=None):
34 |         """
35 |         Arguments:
36 |             images (list[Tensor] or ImageList): images to be processed
37 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
38 | 
39 |         Returns:
40 |             result (list[BoxList] or dict[Tensor]): the output from the model.
41 |                 During training, it returns a dict[Tensor] which contains the losses.
42 |                 During testing, it returns list[BoxList] contains additional fields
43 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
44 | 
45 |         """
46 |         if self.training and targets is None:
47 |             raise ValueError("In training mode, targets should be passed")
48 | 
49 |         # usually, it seems this is already an ImageList.
50 |         images = to_image_list(images)
51 |         features = self.backbone(images.tensors)
52 | 
53 |         # for panoptic FPN, it seems like we might need to disentangle this so we can
54 |         # feed the features to the semantic head.
55 |         proposals, proposal_losses = self.rpn(images, features, targets)
56 |         if self.roi_heads:
57 |             x, result, detector_losses = self.roi_heads(features, proposals, targets)
58 |         else:
59 |             # RPN-only models don't have roi_heads
60 |             x = features
61 |             result = proposals
62 |             detector_losses = {}
63 | 
64 |         if self.training:
65 |             losses = {}
66 |             losses.update(detector_losses)
67 |             losses.update(proposal_losses)
68 |             return losses
69 | 
70 |         return result
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/make_layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | Miscellaneous utility functions
  4 | """
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | from torch.nn import functional as F
  9 | from maskrcnn_benchmark.config import cfg
 10 | from maskrcnn_benchmark.layers import Conv2d
 11 | from maskrcnn_benchmark.layers.misc import DFConv2d
 12 | from maskrcnn_benchmark.modeling.poolers import Pooler
 13 | 
 14 | 
 15 | def get_group_gn(dim, dim_per_gp, num_groups):
 16 |     """get number of groups used by GroupNorm, based on number of channels."""
 17 |     assert dim_per_gp == -1 or num_groups == -1, \
 18 |         "GroupNorm: can only specify G or C/G."
 19 | 
 20 |     if dim_per_gp > 0:
 21 |         assert dim % dim_per_gp == 0, \
 22 |             "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp)
 23 |         group_gn = dim // dim_per_gp
 24 |     else:
 25 |         assert dim % num_groups == 0, \
 26 |             "dim: {}, num_groups: {}".format(dim, num_groups)
 27 |         group_gn = num_groups
 28 | 
 29 |     return group_gn
 30 | 
 31 | 
 32 | def group_norm(out_channels, affine=True, divisor=1):
 33 |     out_channels = out_channels // divisor
 34 |     dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor
 35 |     num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor
 36 |     eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5
 37 |     return torch.nn.GroupNorm(
 38 |         get_group_gn(out_channels, dim_per_gp, num_groups), 
 39 |         out_channels, 
 40 |         eps, 
 41 |         affine
 42 |     )
 43 | 
 44 | 
 45 | def make_conv3x3(
 46 |     in_channels, 
 47 |     out_channels, 
 48 |     dilation=1, 
 49 |     stride=1, 
 50 |     use_gn=False,
 51 |     use_relu=False,
 52 |     kaiming_init=True
 53 | ):
 54 |     conv = Conv2d(
 55 |         in_channels, 
 56 |         out_channels, 
 57 |         kernel_size=3, 
 58 |         stride=stride, 
 59 |         padding=dilation, 
 60 |         dilation=dilation, 
 61 |         bias=False if use_gn else True
 62 |     )
 63 |     if kaiming_init:
 64 |         nn.init.kaiming_normal_(
 65 |             conv.weight, mode="fan_out", nonlinearity="relu"
 66 |         )
 67 |     else:
 68 |         torch.nn.init.normal_(conv.weight, std=0.01)
 69 |     if not use_gn:
 70 |         nn.init.constant_(conv.bias, 0)
 71 |     module = [conv,]
 72 |     if use_gn:
 73 |         module.append(group_norm(out_channels))
 74 |     if use_relu:
 75 |         module.append(nn.ReLU(inplace=True))
 76 |     if len(module) > 1:
 77 |         return nn.Sequential(*module)
 78 |     return conv
 79 | 
 80 | def make_dfconv3x3(
 81 |     in_channels, 
 82 |     out_channels, 
 83 |     dilation=1, 
 84 |     stride=1, 
 85 |     use_gn=False,
 86 |     use_relu=False,
 87 |     kaiming_init=True
 88 | ):
 89 |     conv = DFConv2d(
 90 |         in_channels, 
 91 |         out_channels, 
 92 |         kernel_size=3, 
 93 |         stride=stride, 
 94 |         dilation=dilation, 
 95 |         bias=False if use_gn else True
 96 |     )
 97 |     module = [conv,]
 98 |     if use_gn:
 99 |         module.append(group_norm(out_channels))
100 |     if use_relu:
101 |         module.append(nn.ReLU(inplace=True))
102 |     if len(module) > 1:
103 |         return nn.Sequential(*module)
104 |     return conv
105 | 
106 | 
107 | def make_fc(dim_in, hidden_dim, use_gn):
108 |     '''
109 |         Caffe2 implementation uses XavierFill, which in fact
110 |         corresponds to kaiming_uniform_ in PyTorch
111 |     '''
112 |     if use_gn:
113 |         fc = nn.Linear(dim_in, hidden_dim, bias=False)
114 |         nn.init.kaiming_uniform_(fc.weight, a=1)
115 |         return nn.Sequential(fc, group_norm(hidden_dim))
116 |     fc = nn.Linear(dim_in, hidden_dim)
117 |     nn.init.kaiming_uniform_(fc.weight, a=1)
118 |     nn.init.constant_(fc.bias, 0)
119 |     return fc
120 | 
121 | 
122 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
123 |     def make_conv(
124 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
125 |     ):
126 |         conv = Conv2d(
127 |             in_channels, 
128 |             out_channels, 
129 |             kernel_size=kernel_size, 
130 |             stride=stride, 
131 |             padding=dilation * (kernel_size - 1) // 2, 
132 |             dilation=dilation, 
133 |             bias=False if use_gn else True
134 |         )
135 |         # Caffe2 implementation uses XavierFill, which in fact
136 |         # corresponds to kaiming_uniform_ in PyTorch
137 |         nn.init.kaiming_uniform_(conv.weight, a=1)
138 |         if not use_gn:
139 |             nn.init.constant_(conv.bias, 0)
140 |         module = [conv,]
141 |         if use_gn:
142 |             module.append(group_norm(out_channels))
143 |         if use_relu:
144 |             module.append(nn.ReLU(inplace=True))
145 |         if len(module) > 1:
146 |             return nn.Sequential(*module)
147 |         return conv
148 | 
149 |     return make_conv
150 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/poolers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torch.nn.functional as F
  4 | from torch import nn
  5 | 
  6 | from maskrcnn_benchmark.layers import ROIAlign
  7 | 
  8 | from .utils import cat
  9 | 
 10 | 
 11 | class LevelMapper(object):
 12 |     """Determine which FPN level each RoI in a set of RoIs should map to based
 13 |     on the heuristic in the FPN paper.
 14 |     """
 15 | 
 16 |     def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
 17 |         """
 18 |         Arguments:
 19 |             k_min (int)
 20 |             k_max (int)
 21 |             canonical_scale (int)
 22 |             canonical_level (int)
 23 |             eps (float)
 24 |         """
 25 |         self.k_min = k_min
 26 |         self.k_max = k_max
 27 |         self.s0 = canonical_scale
 28 |         self.lvl0 = canonical_level
 29 |         self.eps = eps
 30 | 
 31 |     def __call__(self, boxlists):
 32 |         """
 33 |         Arguments:
 34 |             boxlists (list[BoxList])
 35 |         """
 36 |         # Compute level ids
 37 |         s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))
 38 | 
 39 |         # Eqn.(1) in FPN paper
 40 |         target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
 41 |         target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
 42 |         return target_lvls.to(torch.int64) - self.k_min
 43 | 
 44 | 
 45 | class Pooler(nn.Module):
 46 |     """
 47 |     Pooler for Detection with or without FPN.
 48 |     It currently hard-code ROIAlign in the implementation,
 49 |     but that can be made more generic later on.
 50 |     Also, the requirement of passing the scales is not strictly necessary, as they
 51 |     can be inferred from the size of the feature map / size of original image,
 52 |     which is available thanks to the BoxList.
 53 |     """
 54 | 
 55 |     def __init__(self, output_size, scales, sampling_ratio):
 56 |         """
 57 |         Arguments:
 58 |             output_size (list[tuple[int]] or list[int]): output size for the pooled region
 59 |             scales (list[float]): scales for each Pooler
 60 |             sampling_ratio (int): sampling ratio for ROIAlign
 61 |         """
 62 |         super(Pooler, self).__init__()
 63 |         poolers = []
 64 |         for scale in scales:
 65 |             poolers.append(
 66 |                 ROIAlign(
 67 |                     output_size, spatial_scale=scale, sampling_ratio=sampling_ratio
 68 |                 )
 69 |             )
 70 |         self.poolers = nn.ModuleList(poolers)
 71 |         self.output_size = output_size
 72 |         # get the levels in the feature map by leveraging the fact that the network always
 73 |         # downsamples by a factor of 2 at each level.
 74 |         lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item()
 75 |         lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item()
 76 |         self.map_levels = LevelMapper(lvl_min, lvl_max)
 77 | 
 78 |     def convert_to_roi_format(self, boxes):
 79 |         concat_boxes = cat([b.bbox for b in boxes], dim=0)
 80 |         device, dtype = concat_boxes.device, concat_boxes.dtype
 81 |         ids = cat(
 82 |             [
 83 |                 torch.full((len(b), 1), i, dtype=dtype, device=device)
 84 |                 for i, b in enumerate(boxes)
 85 |             ],
 86 |             dim=0,
 87 |         )
 88 |         rois = torch.cat([ids, concat_boxes], dim=1)
 89 |         return rois
 90 | 
 91 |     def forward(self, x, boxes):
 92 |         """
 93 |         Arguments:
 94 |             x (list[Tensor]): feature maps for each level
 95 |             boxes (list[BoxList]): boxes to be used to perform the pooling operation.
 96 |         Returns:
 97 |             result (Tensor)
 98 |         """
 99 |         num_levels = len(self.poolers)
100 |         rois = self.convert_to_roi_format(boxes)
101 |         if num_levels == 1:
102 |             return self.poolers[0](x[0], rois)
103 | 
104 |         levels = self.map_levels(boxes)
105 | 
106 |         num_rois = len(rois)
107 |         num_channels = x[0].shape[1]
108 |         output_size = self.output_size[0]
109 | 
110 |         dtype, device = x[0].dtype, x[0].device
111 |         result = torch.zeros(
112 |             (num_rois, num_channels, output_size, output_size),
113 |             dtype=dtype,
114 |             device=device,
115 |         )
116 |         for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)):
117 |             idx_in_level = torch.nonzero(levels == level).squeeze(1)
118 |             rois_per_level = rois[idx_in_level]
119 |             result[idx_in_level] = pooler(per_level_feature, rois_per_level)
120 | 
121 |         return result
122 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 7 | RPN_HEADS = Registry()
 8 | SEMANTIC_HEADS = Registry()
 9 | UPSAMPLE_MODULES = Registry()
10 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | 
10 | class ROIBoxHead(torch.nn.Module):
11 |     """
12 |     Generic Box Head class.
13 |     """
14 | 
15 |     def __init__(self, cfg):
16 |         super(ROIBoxHead, self).__init__()
17 |         self.feature_extractor = make_roi_box_feature_extractor(cfg)
18 |         self.predictor = make_roi_box_predictor(cfg)
19 |         self.post_processor = make_roi_box_post_processor(cfg)
20 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
21 | 
22 |     def forward(self, features, proposals, targets=None):
23 |         """
24 |         Arguments:
25 |             features (list[Tensor]): feature-maps from possibly several levels
26 |             proposals (list[BoxList]): proposal boxes
27 |             targets (list[BoxList], optional): the ground-truth targets.
28 | 
29 |         Returns:
30 |             x (Tensor): the result of the feature extractor
31 |             proposals (list[BoxList]): during training, the subsampled proposals
32 |                 are returned. During testing, the predicted boxlists are returned
33 |             losses (dict[Tensor]): During training, returns the losses for the
34 |                 head. During testing, returns an empty dict.
35 |         """
36 | 
37 |         if self.training:
38 |             # Faster R-CNN subsamples during training the proposals with a fixed
39 |             # positive / negative ratio
40 |             with torch.no_grad():
41 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
42 | 
43 |         # extract features that will be fed to the final classifier. The
44 |         # feature_extractor generally corresponds to the pooler + heads
45 |         x = self.feature_extractor(features, proposals)
46 |         # final classifier that converts the features into predictions
47 |         class_logits, box_regression = self.predictor(x)
48 | 
49 |         if not self.training:
50 |             result = self.post_processor((class_logits, box_regression), proposals)
51 |             return x, result, {}
52 | 
53 |         loss_classifier, loss_box_reg = self.loss_evaluator(
54 |             [class_logits], [box_regression]
55 |         )
56 |         return (
57 |             x,
58 |             proposals,
59 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
60 |         )
61 | 
62 | 
63 | def build_roi_box_head(cfg):
64 |     """
65 |     Constructs a new box head.
66 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
67 |     and make it a parameter in the config
68 |     """
69 |     return ROIBoxHead(cfg)
70 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | 
 4 | 
 5 | class FastRCNNPredictor(nn.Module):
 6 |     def __init__(self, config, pretrained=None):
 7 |         super(FastRCNNPredictor, self).__init__()
 8 | 
 9 |         stage_index = 4
10 |         stage2_relative_factor = 2 ** (stage_index - 1)
11 |         res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
12 |         num_inputs = res2_out_channels * stage2_relative_factor
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         self.bbox_pred = nn.Linear(num_inputs, num_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | class FPNPredictor(nn.Module):
34 |     def __init__(self, cfg):
35 |         super(FPNPredictor, self).__init__()
36 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
37 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
38 | 
39 |         self.cls_score = nn.Linear(representation_size, num_classes)
40 |         self.bbox_pred = nn.Linear(representation_size, num_classes * 4)
41 | 
42 |         nn.init.normal_(self.cls_score.weight, std=0.01)
43 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
44 |         for l in [self.cls_score, self.bbox_pred]:
45 |             nn.init.constant_(l.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         scores = self.cls_score(x)
49 |         bbox_deltas = self.bbox_pred(x)
50 | 
51 |         return scores, bbox_deltas
52 | 
53 | 
54 | _ROI_BOX_PREDICTOR = {
55 |     "FastRCNNPredictor": FastRCNNPredictor,
56 |     "FPNPredictor": FPNPredictor,
57 | }
58 | 
59 | 
60 | def make_roi_box_predictor(cfg):
61 |     func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | import pdb
13 | 
14 | def keep_only_positive_boxes(boxes):
15 |     """
16 |     Given a set of BoxList containing the `labels` field,
17 |     return a set of BoxList for which `labels > 0`.
18 | 
19 |     Arguments:
20 |         boxes (list of BoxList)
21 |     """
22 |     assert isinstance(boxes, (list, tuple))
23 |     assert isinstance(boxes[0], BoxList)
24 |     assert boxes[0].has_field("labels")
25 |     positive_boxes = []
26 |     positive_inds = []
27 |     num_boxes = 0
28 |     for boxes_per_image in boxes:
29 |         labels = boxes_per_image.get_field("labels")
30 |         inds_mask = labels > 0
31 |         inds = inds_mask.nonzero().squeeze(1)
32 |         positive_boxes.append(boxes_per_image[inds])
33 |         positive_inds.append(inds_mask)
34 |     return positive_boxes, positive_inds
35 | 
36 | 
37 | class ROIMaskHead(torch.nn.Module):
38 |     def __init__(self, cfg):
39 |         super(ROIMaskHead, self).__init__()
40 |         self.cfg = cfg.clone()
41 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg)
42 |         self.predictor = make_roi_mask_predictor(cfg)
43 |         self.post_processor = make_roi_mask_post_processor(cfg)
44 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
45 | 
46 |     def forward(self, features, proposals, targets=None):
47 |         """
48 |         Arguments:
49 |             features (list[Tensor]): feature-maps from possibly several levels
50 |             proposals (list[BoxList]): proposal boxes
51 |             targets (list[BoxList], optional): the ground-truth targets.
52 | 
53 |         Returns:
54 |             x (Tensor): the result of the feature extractor
55 |             proposals (list[BoxList]): during training, the original proposals
56 |                 are returned. During testing, the predicted boxlists are returned
57 |                 with the `mask` field set
58 |             losses (dict[Tensor]): During training, returns the losses for the
59 |                 head. During testing, returns an empty dict.
60 |         """
61 | 
62 |         if self.training:
63 |             # during training, only focus on positive boxes
64 |             all_proposals = proposals
65 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
66 | 
67 |         # len(proposals) has usually quite dwindled down by now.        
68 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
69 |             x = features
70 |             x = x[torch.cat(positive_inds, dim=0)]
71 |         else:
72 |             x = self.feature_extractor(features, proposals)
73 | 
74 |         mask_logits, mask_scores = self.predictor(x)
75 | 
76 |         if not self.training:
77 |             result = self.post_processor(mask_logits, proposals, mask_scores)
78 |             return x, result, {}, mask_logits, mask_scores
79 | 
80 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
81 | 
82 |         return x, all_proposals, dict(loss_mask=loss_mask), mask_logits, mask_scores
83 | 
84 | 
85 | def build_roi_mask_head(cfg):
86 |     return ROIMaskHead(cfg)
87 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling.poolers import Pooler
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 9 | 
10 | 
11 | 
12 | class MaskRCNNFPNFeatureExtractor(nn.Module):
13 |     """
14 |     Heads for FPN for classification
15 |     """
16 | 
17 |     def __init__(self, cfg):
18 |         """
19 |         Arguments:
20 |             num_classes (int): number of output classes
21 |             input_size (int): number of channels of the input once it's flattened
22 |             representation_size (int): size of the intermediate representation
23 |         """
24 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
25 | 
26 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
27 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
28 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
29 |         pooler = Pooler(
30 |             output_size=(resolution, resolution),
31 |             scales=scales,
32 |             sampling_ratio=sampling_ratio,
33 |         )
34 |         input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
35 |         self.pooler = pooler
36 | 
37 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
38 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
39 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
40 | 
41 |         next_feature = input_size
42 |         self.blocks = []
43 |         for layer_idx, layer_features in enumerate(layers, 1):
44 |             layer_name = "mask_fcn{}".format(layer_idx)
45 |             module = make_conv3x3(next_feature, layer_features, 
46 |                 dilation=dilation, stride=1, use_gn=use_gn
47 |             )
48 |             self.add_module(layer_name, module)
49 |             next_feature = layer_features
50 |             self.blocks.append(layer_name)
51 | 
52 |     def forward(self, x, proposals):
53 |         x = self.pooler(x, proposals)
54 | 
55 |         for layer_name in self.blocks:
56 |             x = F.relu(getattr(self, layer_name)(x))
57 | 
58 |         return x
59 | 
60 | 
61 | _ROI_MASK_FEATURE_EXTRACTORS = {
62 |     "ResNet50Conv5ROIFeatureExtractor": ResNet50Conv5ROIFeatureExtractor,
63 |     "MaskRCNNFPNFeatureExtractor": MaskRCNNFPNFeatureExtractor,
64 | }
65 | 
66 | 
67 | def make_roi_mask_feature_extractor(cfg):
68 |     func = _ROI_MASK_FEATURE_EXTRACTORS[cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR]
69 |     return func(cfg)
70 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | 
 8 | import pdb
 9 | 
10 | 
11 | class MaskRCNNC4Predictor(nn.Module):
12 |     def __init__(self, cfg):
13 |         super(MaskRCNNC4Predictor, self).__init__()
14 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 | 
17 |         if cfg.MODEL.ROI_HEADS.USE_FPN:
18 |             num_inputs = dim_reduced
19 |         else:
20 |             stage_index = 4
21 |             stage2_relative_factor = 2 ** (stage_index - 1)
22 |             res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
23 |             num_inputs = res2_out_channels * stage2_relative_factor
24 | 
25 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
26 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
27 | 
28 |         for name, param in self.named_parameters():
29 |             if "bias" in name:
30 |                 nn.init.constant_(param, 0)
31 |             elif "weight" in name:
32 |                 # Caffe2 implementation uses MSRAFill, which in fact
33 |                 # corresponds to kaiming_normal_ in PyTorch
34 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
35 | 
36 |     def forward(self, x):
37 |         x = F.relu(self.conv5_mask(x))
38 | 
39 |         return self.mask_fcn_logits(x), None
40 | 
41 | class WithScorePredictor(nn.Module):
42 |     def __init__(self, cfg):
43 |         super(WithScorePredictor, self).__init__()
44 | 
45 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
46 |         num_inputs = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
47 | 
48 |         # do we need more parameters?
49 |         self.score_avgpool = nn.AvgPool2d(kernel_size=14, stride=14)
50 |         self.mask_score = nn.Linear(num_inputs, num_classes)
51 | 
52 |         nn.init.normal_(self.mask_score.weight, mean=0, std=0.01)
53 |         nn.init.constant_(self.mask_score.bias, 0)
54 | 
55 |     def forward(self, x):
56 |         pool = self.score_avgpool(x)
57 |         flat = pool.view(pool.size(0), -1)
58 |         return self.mask_score(flat)
59 | 
60 | class MaskRCNNC4WithScorePredictor(nn.Module):
61 |     def __init__(self, cfg):
62 |         super(MaskRCNNC4WithScorePredictor, self).__init__()
63 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
64 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
65 | 
66 |         if cfg.MODEL.ROI_HEADS.USE_FPN:
67 |             num_inputs = dim_reduced
68 |         else:
69 |             stage_index = 4
70 |             stage2_relative_factor = 2 ** (stage_index - 1)
71 |             res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
72 |             num_inputs = res2_out_channels * stage2_relative_factor
73 | 
74 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
75 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
76 |         self.score_predictor = WithScorePredictor(cfg)
77 | 
78 |         for name, param in self.named_parameters():
79 |             if "bias" in name:
80 |                 nn.init.constant_(param, 0)
81 |             elif "weight" in name:
82 |                 # Caffe2 implementation uses MSRAFill, which in fact
83 |                 # corresponds to kaiming_normal_ in PyTorch
84 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
85 | 
86 |     def forward(self, x):
87 |         y = self.score_predictor(x)
88 |         x = F.relu(self.conv5_mask(x))
89 | 
90 |         return self.mask_fcn_logits(x), y
91 | 
92 | _ROI_MASK_PREDICTOR = {"MaskRCNNC4Predictor": MaskRCNNC4Predictor, "MaskRCNNC4WithScorePredictor": MaskRCNNC4WithScorePredictor}
93 | 
94 | def make_roi_mask_predictor(cfg):
95 |     func = _ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
96 |     return func(cfg)
97 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/order_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/roi_heads/order_head/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/order_head/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import pycocotools.mask as mask_util
  4 | import torch
  5 | 
  6 | import pdb
  7 | 
  8 | def prepare_mask_intersection_matrix(boxes, hard_masks):
  9 |     import pycocotools.mask as mask_util
 10 |     
 11 |     rles = [
 12 |         mask_util.encode(np.array(hard_mask[0, :, :, np.newaxis], order="F"))[0]
 13 |         for hard_mask in hard_masks
 14 |     ]
 15 | 
 16 |     iscrowd = []
 17 |     for rle in rles:
 18 |         rle["counts"] = rle["counts"].decode("utf-8")
 19 |         iscrowd.append(0)
 20 |     
 21 |     # quickly find those that intersect at all.
 22 |     iou = mask_util.iou(rles, rles, iscrowd)
 23 |     number_masks = len(rles)
 24 | 
 25 |     # zeros prevents diagonals from ever being considered.
 26 |     intersect_ratio = np.zeros_like(iou, dtype=np.float32)
 27 | 
 28 |     # note, these masks are variable size. unsure whether to do this altogether.
 29 |     #intersect_bbox = np.zeros((number_masks, number_masks, 4), dtype=np.int32)
 30 |     
 31 |     for from_index in range(intersect_ratio.shape[0]):
 32 |         from_rle = rles[from_index]
 33 |         from_area = mask_util.area(from_rle)
 34 | 
 35 |         # only need the upper triangle.
 36 |         for to_index in range(from_index + 1, intersect_ratio.shape[1]):
 37 |             to_rle = rles[to_index]
 38 |             to_area = mask_util.area(to_rle)
 39 | 
 40 |             # this is symmetric.
 41 |             merged = mask_util.merge([from_rle, to_rle], intersect=True)
 42 | 
 43 |             # do we compute the bbox here or wait for later? add some padding?
 44 |             #merged_bbox = mask_util.toBbox(merged)
 45 |             #intersect_bbox[from_index, to_index] = merged_bbox[0]
 46 |             #intersect_bbox[to_index, from_index] = merged_bbox[0]
 47 |             
 48 |             from_to_intersect = float(mask_util.area(merged))
 49 | 
 50 |             from_ratio = from_to_intersect / (from_area + 0.0001)
 51 |             to_ratio = from_to_intersect / (to_area + 0.0001)
 52 | 
 53 |             # from -> to.
 54 |             intersect_ratio[from_index, to_index] = from_ratio
 55 |             intersect_ratio[to_index, from_index] = to_ratio
 56 | 
 57 |     intersect_ratio = torch.tensor(intersect_ratio).to(boxes.bbox.device)
 58 |     #intersect_bbox = torch.tensor(intersect_bbox).to(boxes.bbox.device)
 59 |     
 60 |     return intersect_ratio#, intersect_bbox
 61 | 
 62 | def filter_actual_overlaps(target, matching, proposal_pairs):
 63 |     if not target.has_field("overlaps"):
 64 |         raise ValueError("overlaps do not exist on target")
 65 | 
 66 |     # from the "overlap" ground truth, we can infer the ground truth class that survives the overlap.
 67 |     overlaps = target.get_field("overlaps")
 68 | 
 69 |     first_idxs = torch.unsqueeze(matching[proposal_pairs[:, 0]], dim=1)
 70 |     second_idxs = torch.unsqueeze(matching[proposal_pairs[:, 1]], dim=1)
 71 |                 
 72 |     selected_overlaps = overlaps[first_idxs, second_idxs]
 73 |             
 74 |     actual_overlaps = selected_overlaps >= 0            
 75 |     mask_of_overlaps = torch.nonzero(actual_overlaps)[:, 0]
 76 |     
 77 |     return mask_of_overlaps, selected_overlaps
 78 | 
 79 | def subsample_actual_overlaps(mask_of_overlaps, maximum_per_image=None):
 80 |     if maximum_per_image is None:
 81 |         return mask_of_overlaps
 82 |     
 83 |     number_masked = mask_of_overlaps.shape[0]
 84 |     subsample_size = min(maximum_per_image, number_masked)
 85 |     subsample_perm = torch.randperm(number_masked, device=mask_of_overlaps.device)[:subsample_size]
 86 |         
 87 |     return mask_of_overlaps[subsample_perm]
 88 | 
 89 | def compute_overlap_matrix(target):
 90 |     width, height = target.size
 91 |     masks = target.get_field("masks")
 92 | 
 93 |     rles = []
 94 |     iscrowd = []
 95 |     for poly in masks.polygons:
 96 |         encoded = mask_util.frPyObjects(poly.polygons, height, width)
 97 |         
 98 |         rles.append(encoded[0])
 99 |         iscrowd.append(0)
100 | 
101 |     return prepare_mask_intersection_matrix(target, rles, iscrowd)
102 |     
103 |     
104 |     
105 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/order_head/roi_order_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | 
 7 | class OrderHeadFeatureExtractor(nn.Module):
 8 |     def __init__(self, cfg):
 9 |         super(OrderHeadFeatureExtractor, self).__init__()
10 | 
11 |         self.cfg = cfg.clone()
12 |         
13 |         # paired.
14 |         input_channels = self.cfg.MODEL.ROI_ORDER_HEAD.NUMBER_INPUT_CHANNELS
15 |         number_channels = 512
16 | 
17 |         self.conv1 = Conv2d(input_channels, number_channels, 3, 1, 1) 
18 |         self.conv2 = Conv2d(number_channels, number_channels, 3, 1, 1) 
19 |         self.conv3 = Conv2d(number_channels, number_channels, 3, 1, 1)
20 | 
21 |         # stride 2.
22 |         self.conv4 = Conv2d(number_channels, number_channels, 3, 2, 1) 
23 | 
24 |         for l in [self.conv1, self.conv2, self.conv3, self.conv4]:
25 |             nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu")
26 |             nn.init.constant_(l.bias, 0)
27 | 
28 |     def forward(self, x):
29 |         x = F.relu(self.conv1(x))
30 |         x = F.relu(self.conv2(x))
31 |         x = F.relu(self.conv3(x))
32 |         x = F.relu(self.conv4(x))
33 |  
34 |         return x
35 | 
36 | def make_roi_order_feature_extractor(cfg):
37 |     return OrderHeadFeatureExtractor(cfg)
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/order_head/roi_order_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.layers import Conv2d
 5 | 
 6 | class OrderPredictor(nn.Module):
 7 |     def __init__(self, cfg):
 8 |         super(OrderPredictor, self).__init__()
 9 | 
10 |         self.fc1 = nn.Linear(512 * 7 * 7, 1024)
11 |         self.project = nn.Linear(1024, 1)
12 | 
13 |         nn.init.kaiming_uniform_(self.fc1.weight, a=1)
14 |         nn.init.constant_(self.fc1.bias, 0)
15 | 
16 |         nn.init.normal_(self.project.weight, mean=0, std=0.01)
17 |         nn.init.constant_(self.project.bias, 0)
18 | 
19 |     def forward(self, x):
20 |         x = x.view(x.size(0), -1)
21 |         x = F.relu(self.fc1(x))
22 |         x = self.project(x)
23 |         
24 |         return x
25 | 
26 | class OrderPredictorThing(nn.Module):
27 |     def __init__(self, cfg):
28 |         super(OrderPredictorThing, self).__init__()
29 | 
30 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
31 | 
32 |         # not sure about this one.
33 |         self.fc1 = nn.Linear(512 * 7 * 7, 1024)
34 |         self.project = nn.Linear(1024, num_classes)
35 | 
36 |         nn.init.kaiming_uniform_(self.fc1.weight, a=1)
37 |         nn.init.constant_(self.fc1.bias, 0)
38 | 
39 |         nn.init.normal_(self.project.weight, mean=0, std=0.01)
40 |         nn.init.constant_(self.project.bias, 0)
41 | 
42 |     def forward(self, x):
43 |         x = x.view(x.size(0), -1)
44 |         x = F.relu(self.fc1(x))
45 |         x = self.project(x)
46 |         
47 |         return x
48 | 
49 | _ROI_ORDER_PREDICTOR = {
50 |     "OrderPredictor": OrderPredictor,
51 |     "OrderPredictorThing": OrderPredictorThing
52 | }
53 | 
54 | def make_roi_order_predictor(cfg):
55 |     func = _ROI_ORDER_PREDICTOR["OrderPredictor"]
56 |     return func(cfg)
57 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .box_head.box_head import build_roi_box_head
 5 | from .mask_head.mask_head import build_roi_mask_head
 6 | from .order_head.order_head import build_roi_order_head
 7 | 
 8 | import numpy as np
 9 | import pdb
10 | 
11 | class CombinedROIHeads(torch.nn.ModuleDict):
12 |     """
13 |     Combines a set of individual heads (for box prediction or masks) into a single
14 |     head.
15 |     """
16 | 
17 |     def __init__(self, cfg, heads):
18 |         super(CombinedROIHeads, self).__init__(heads)
19 |         self.cfg = cfg.clone()
20 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
21 |             self.mask.feature_extractor = self.box.feature_extractor
22 | 
23 |         self.feed_ground_truth_instances = cfg.TEST.FEED_GROUND_TRUTH_INSTANCES
24 | 
25 |     def forward(self, features, proposals, targets=None, semantic_targets=None):
26 |         losses = {}
27 |         mask_logits = None        
28 |         
29 |         # TODO rename x to roi_box_features, if it doesn't increase memory consumption
30 |         x, detections, loss_box = self.box(features, proposals, targets)
31 |         losses.update(loss_box)
32 | 
33 |         # replace the detections once again.
34 |         if self.feed_ground_truth_instances:
35 |             detections = proposals
36 |         
37 |         if self.cfg.MODEL.MASK_ON:
38 |             mask_features = features
39 |             # optimization: during training, if we share the feature extractor between
40 |             # the box and the mask heads, then we can reuse the features already computed
41 |             if (
42 |                 self.training
43 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
44 |             ):
45 |                 mask_features = x
46 | 
47 |             # During training, self.box() will return the unaltered proposals as "detections"
48 |             # this makes the API consistent during training and testing
49 |             x, detections, loss_mask, mask_logits, mask_scores = self.mask(mask_features, detections, targets)
50 |             losses.update(loss_mask)
51 | 
52 |             if self.cfg.MODEL.ORDER_ON:
53 |                 # mask head does this in a nicer way by "sharing" the feature extractor and
54 |                 # opting not to compute it again at training time. TODO!
55 |                 order_features = features
56 |                 if self.cfg.MODEL.ROI_ORDER_HEAD.SHARE_MASK_FEATURE_EXTRACTOR:
57 |                     order_features = x
58 | 
59 |                 loss_order = self.order(order_features, detections, mask_logits, targets)
60 |                 losses.update(loss_order)
61 | 
62 |         return x, detections, losses
63 | 
64 | def build_roi_heads(cfg):
65 |     # individually create the heads, that will be combined together
66 |     # afterwards
67 |     roi_heads = []
68 |     if not cfg.MODEL.RPN_ONLY:
69 |         roi_heads.append(("box", build_roi_box_head(cfg)))
70 |     if cfg.MODEL.MASK_ON:
71 |         roi_heads.append(("mask", build_roi_mask_head(cfg)))
72 |     if cfg.MODEL.ORDER_ON:
73 |         roi_heads.append(("order", build_roi_order_head(cfg)))        
74 | 
75 |     # combine individual heads in a single module
76 |     if roi_heads:
77 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
78 | 
79 |     return roi_heads
80 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/segmenter/__init__.py:
--------------------------------------------------------------------------------
1 | from .segmenters import build_segmentation_model
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/segmenter/segmenters.py:
--------------------------------------------------------------------------------
 1 | from .semantic_fpn import SemanticFPN
 2 | 
 3 | _SEGMENTATION_META_ARCHITECTURES = {
 4 |     "SemanticFPN": SemanticFPN
 5 | }
 6 | 
 7 | def build_segmentation_model(cfg):
 8 |     meta_arch = _SEGMENTATION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
 9 |     return meta_arch(cfg)
10 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/segmenter/semantic_fpn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Semantic FPN
 3 | """
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | 
 8 | from maskrcnn_benchmark.structures.image_list import to_image_list
 9 | 
10 | from ..backbone import build_backbone
11 | from ..semantic.segmentation import build_semantic_segmentation
12 | 
13 | import pdb
14 | 
15 | class SemanticFPN(nn.Module):
16 |     """
17 |     Main class for Semantic FPN. Takes a semantic target.
18 |     It consists of three main parts:
19 |     - backbone
20 |     """
21 | 
22 |     def __init__(self, cfg):
23 |         super(SemanticFPN, self).__init__()
24 | 
25 |         self.backbone = build_backbone(cfg)
26 |         self.semantic_segmentation = build_semantic_segmentation(cfg)
27 | 
28 |     def forward(self, images, targets=None):
29 |         """
30 |         Arguments:
31 |             images (list[Tensor] or ImageList): images to be processed
32 |             targets (list[SemanticSegmentation]): ground-truth segmentation in the image (optional)
33 | 
34 |         Returns:
35 |             result (list[Tensor] or dict[Tensor]): the output from the model.
36 |                 During training, it returns a dict[Tensor] which contains the losses.
37 |                 During testing, it returns list[Tensor] contains the segmentation image.
38 | 
39 |         """
40 |         if self.training and targets is None:
41 |             raise ValueError("In training mode, targets should be passed")
42 | 
43 |         # usually, it seems this is already an ImageList.
44 |         images = to_image_list(images)
45 | 
46 |         # note that these are already run through FPN if FPN is included.
47 |         features = self.backbone(images.tensors)
48 | 
49 |         semantic_targets = [f.segmentation for f in targets]
50 |         mask, semantic_losses = self.semantic_segmentation(images, features, semantic_targets)
51 | 
52 |         if self.training:
53 |             losses = {}
54 |             losses.update(semantic_losses)
55 |             return losses
56 | 
57 |         return mask
58 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/semantic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/modeling/semantic/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/semantic/loss.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file contains specific functions for computing losses from
 3 | semantic segmentation
 4 | """
 5 | 
 6 | import torch
 7 | from torch.nn import functional as F
 8 | 
 9 | from maskrcnn_benchmark.modeling.utils import cat
10 | import pdb
11 | 
12 | class SemanticLossComputation(object):
13 |     """
14 |     This class computes the semantic segmentation loss.
15 |     """
16 | 
17 |     def __init__(self):
18 |         pass
19 |     
20 |     def __call__(self, class_logits, targets):
21 |         """
22 |         Arguments:
23 |             logits (list[Tensor])
24 |             targets (list[SemanticSegmentation])
25 | 
26 |         Returns:
27 |             semantic_loss (Tensor)
28 |         """
29 |         
30 |         # it appears that "class_logits" might be right/bottom zero padding. take
31 |         # care of any differences there and add them as not having a label on
32 |         # a per image basis.
33 |         labels = [target.mask for target in targets]
34 |         has_label_mask = [target.has_label_mask for target in targets]
35 | 
36 |         # todo, easier way?
37 |         for i in range(len(targets)):
38 |             given_shape = targets[i].mask.shape
39 |             padded_shape = class_logits[i].shape[1:]
40 |             extra_bottom = padded_shape[0] - given_shape[0]
41 |             extra_right = padded_shape[1] - given_shape[1]
42 | 
43 |             labels[i] = torch.unsqueeze(F.pad(labels[i], (0, extra_right, 0, extra_bottom), value=0), dim=0)
44 |             has_label_mask[i] = torch.unsqueeze(F.pad(has_label_mask[i], (0, extra_right, 0, extra_bottom), value=0), dim=0)
45 | 
46 |         labels = cat(labels, dim=0).long()
47 |         has_label_mask = cat(has_label_mask, dim=0).float()
48 | 
49 |         # this might be interesting to play with when the ontology is dense.
50 |         # e.g. do we penalize getting a "stuff" class when a "thing" is present in a different
51 |         # manner to getting a _wrong_ "thing" class?
52 |         classification_loss = F.cross_entropy(class_logits, labels, reduction="none")
53 | 
54 |         # multiply by the "has label" mask
55 |         masked_classification_loss = has_label_mask * classification_loss
56 |         mean_classification_loss = torch.mean(masked_classification_loss)
57 | 
58 |         return mean_classification_loss
59 | 
60 | def make_semantic_loss_evaluator(cfg):
61 |     loss_evaluator = SemanticLossComputation()
62 |     return loss_evaluator
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | import pdb
 4 | 
 5 | from .lr_scheduler import WarmupMultiStepLR
 6 | 
 7 | 
 8 | def make_optimizer(cfg, model):
 9 |     params = []
10 | 
11 |     order_only = cfg.MODEL.ORDER_ON and cfg.MODEL.ROI_ORDER_HEAD.ONLY_TRAIN
12 |     for key, value in model.named_parameters():
13 |         if order_only and not ("roi_heads.order" in key):
14 |             print("turning off {0} due to order head only".format(key))
15 |             value.requires_grad = False
16 |         
17 |         if not value.requires_grad:
18 |             continue
19 |         lr = cfg.SOLVER.BASE_LR
20 | 
21 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
22 |         if "bias" in key:
23 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
24 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
25 | 
26 |         add_params = {
27 |             "params": [value],
28 |             "lr": lr,
29 |             "weight_decay": weight_decay
30 |         }
31 | 
32 |         # FIX.
33 |         if cfg.SOLVER.RESUME_ITER > 0:
34 |             add_params["initial_lr"] = cfg.SOLVER.BASE_LR
35 | 
36 |         params += [add_params]
37 | 
38 |     optimizer = torch.optim.SGD(params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM)
39 |     return optimizer
40 | 
41 | def make_lr_scheduler(cfg, optimizer):
42 |     last_epoch = -1
43 |     if cfg.SOLVER.RESUME_ITER > 0:
44 |         last_epoch = cfg.SOLVER.RESUME_ITER
45 |     
46 |     return WarmupMultiStepLR(
47 |         optimizer,
48 |         cfg.SOLVER.STEPS,
49 |         cfg.SOLVER.GAMMA,
50 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
51 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
52 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
53 |         last_epoch=last_epoch
54 |     )
55 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/boxlist_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | from .bounding_box import BoxList
  5 | 
  6 | from maskrcnn_benchmark.layers import nms as _box_nms
  7 | 
  8 | 
  9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="score"):
 10 |     """
 11 |     Performs non-maximum suppression on a boxlist, with scores specified
 12 |     in a boxlist field via score_field.
 13 | 
 14 |     Arguments:
 15 |         boxlist(BoxList)
 16 |         nms_thresh (float)
 17 |         max_proposals (int): if > 0, then only the top max_proposals are kept
 18 |             after non-maxium suppression
 19 |         score_field (str)
 20 |     """
 21 |     if nms_thresh <= 0:
 22 |         return boxlist
 23 |     mode = boxlist.mode
 24 |     boxlist = boxlist.convert("xyxy")
 25 |     boxes = boxlist.bbox
 26 |     score = boxlist.get_field(score_field)
 27 |     keep = _box_nms(boxes, score, nms_thresh)
 28 |     if max_proposals > 0:
 29 |         keep = keep[: max_proposals]
 30 |     boxlist = boxlist[keep]
 31 |     return boxlist.convert(mode)
 32 | 
 33 | 
 34 | def remove_small_boxes(boxlist, min_size):
 35 |     """
 36 |     Only keep boxes with both sides >= min_size
 37 | 
 38 |     Arguments:
 39 |         boxlist (Boxlist)
 40 |         min_size (int)
 41 |     """
 42 |     # TODO maybe add an API for querying the ws / hs
 43 |     xywh_boxes = boxlist.convert("xywh").bbox
 44 |     _, _, ws, hs = xywh_boxes.unbind(dim=1)
 45 |     keep = (
 46 |         (ws >= min_size) & (hs >= min_size)
 47 |     ).nonzero().squeeze(1)
 48 |     return boxlist[keep]
 49 | 
 50 | 
 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
 52 | # with slight modifications
 53 | def boxlist_iou(boxlist1, boxlist2):
 54 |     """Compute the intersection over union of two set of boxes.
 55 |     The box order must be (xmin, ymin, xmax, ymax).
 56 | 
 57 |     Arguments:
 58 |       box1: (BoxList) bounding boxes, sized [N,4].
 59 |       box2: (BoxList) bounding boxes, sized [M,4].
 60 | 
 61 |     Returns:
 62 |       (tensor) iou, sized [N,M].
 63 | 
 64 |     Reference:
 65 |       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
 66 |     """
 67 |     if boxlist1.size != boxlist2.size:
 68 |         raise RuntimeError(
 69 |                 "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2))
 70 | 
 71 |     N = len(boxlist1)
 72 |     M = len(boxlist2)
 73 | 
 74 |     area1 = boxlist1.area()
 75 |     area2 = boxlist2.area()
 76 | 
 77 |     box1, box2 = boxlist1.bbox, boxlist2.bbox
 78 | 
 79 |     lt = torch.max(box1[:, None, :2], box2[:, :2])  # [N,M,2]
 80 |     rb = torch.min(box1[:, None, 2:], box2[:, 2:])  # [N,M,2]
 81 | 
 82 |     TO_REMOVE = 1
 83 | 
 84 |     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,2]
 85 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 86 | 
 87 |     iou = inter / (area1[:, None] + area2 - inter)
 88 |     return iou
 89 | 
 90 | def boxlist_intersect(boxlist1, boxlist2):
 91 |     """Compute the intersection of two set of boxes.
 92 |     The box order must be (xmin, ymin, xmax, ymax).
 93 | 
 94 |     Arguments:
 95 |       box1: (BoxList) bounding boxes, sized [N,4].
 96 |       box2: (BoxList) bounding boxes, sized [M,4].
 97 | 
 98 |     Returns:
 99 |       (tensor) inter, sized [N,M].
100 | 
101 |     Reference:
102 |       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
103 |     """
104 |     if boxlist1.size != boxlist2.size:
105 |         raise RuntimeError(
106 |                 "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2))
107 | 
108 |     N = len(boxlist1)
109 |     M = len(boxlist2)
110 | 
111 |     box1, box2 = boxlist1.bbox, boxlist2.bbox
112 | 
113 |     lt = torch.max(box1[:, None, :2], box2[:, :2])  # [N,M,2]
114 |     rb = torch.min(box1[:, None, 2:], box2[:, 2:])  # [N,M,2]
115 | 
116 |     TO_REMOVE = 1
117 | 
118 |     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,2]
119 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
120 | 
121 |     return inter
122 | 
123 | # TODO redundant, remove
124 | def _cat(tensors, dim=0):
125 |     """
126 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
127 |     """
128 |     assert isinstance(tensors, (list, tuple))
129 |     if len(tensors) == 1:
130 |         return tensors[0]
131 |     return torch.cat(tensors, dim)
132 | 
133 | 
134 | def cat_boxlist(bboxes):
135 |     """
136 |     Concatenates a list of BoxList (having the same image size) into a
137 |     single BoxList
138 | 
139 |     Arguments:
140 |         bboxes (list[BoxList])
141 |     """
142 |     assert isinstance(bboxes, (list, tuple))
143 |     assert all(isinstance(bbox, BoxList) for bbox in bboxes)
144 | 
145 |     size = bboxes[0].size
146 |     assert all(bbox.size == size for bbox in bboxes)
147 | 
148 |     mode = bboxes[0].mode
149 |     assert all(bbox.mode == mode for bbox in bboxes)
150 | 
151 |     fields = set(bboxes[0].fields())
152 |     assert all(set(bbox.fields()) == fields for bbox in bboxes)
153 | 
154 |     cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode)
155 | 
156 |     for field in fields:
157 |         data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0)
158 |         cat_boxes.add_field(field, data)
159 | 
160 |     return cat_boxes
161 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         assert tensors.dim() == 4
45 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
46 |         return ImageList(tensors, image_sizes)
47 |     elif isinstance(tensors, (tuple, list)):
48 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
49 | 
50 |         # TODO Ideally, just remove this and let me model handle arbitrary
51 |         # input sizs
52 |         if size_divisible > 0:
53 |             import math
54 | 
55 |             stride = size_divisible
56 |             max_size = list(max_size)
57 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
58 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
59 |             max_size = tuple(max_size)
60 | 
61 |         batch_shape = (len(tensors),) + max_size
62 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
63 |         for img, pad_img in zip(tensors, batched_imgs):
64 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
65 | 
66 |         image_sizes = [im.shape[-2:] for im in tensors]
67 | 
68 |         return ImageList(batched_imgs, image_sizes)
69 |     else:
70 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/panoptic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import PIL
  3 | import torch
  4 | 
  5 | import pdb
  6 | 
  7 | from PIL import Image
  8 | 
  9 | FLIP_LEFT_RIGHT = 0
 10 | FLIP_TOP_BOTTOM = 1
 11 | RGB_BASE = 256
 12 | 
 13 | class SemanticSegmentation(object):
 14 |     # mask is a mask of class IDs, has_label_mask is binary for whether
 15 |     # the pixel is labeled.
 16 |     def __init__(self, mask, has_label_mask):
 17 |         self.mask = mask
 18 |         self.has_label_mask = has_label_mask
 19 |         # will Mask RCNN put the previous on the GPU and then call augmentation methods?
 20 | 
 21 |     def to(self, device):
 22 |         return SemanticSegmentation(
 23 |             mask=self.mask.to(device),
 24 |             has_label_mask=self.has_label_mask.to(device) if not (self.has_label_mask is None) else None)
 25 | 
 26 |     def resize(self, size, *args, **kwargs):
 27 |         if (self.mask.device.type != 'cpu') and (self.has_label_mask.device.type != 'cpu'):
 28 |             raise Exception('expected tensors to be on the CPU for resizing')
 29 | 
 30 |         # todo, put this all on the GPU/CPU without going back to PIL.
 31 |         mask_np = self.mask.numpy().astype(np.uint8)
 32 |         mask_img = Image.fromarray(mask_np)
 33 |         resized_mask_img = mask_img.resize(size, resample=PIL.Image.NEAREST)
 34 | 
 35 |         if not (self.has_label_mask is None):
 36 |             has_label_mask_np = self.has_label_mask.numpy().astype(np.uint8)
 37 |             has_label_mask_img = Image.fromarray(has_label_mask_np)
 38 |             resized_has_label_mask_img = has_label_mask_img.resize(size, resample=PIL.Image.NEAREST)
 39 |             resized_has_label_mask = torch.tensor(np.array(resized_has_label_mask_img).astype(np.int32))
 40 |         else:
 41 |             resized_has_label_mask = None
 42 |             
 43 |         # seems like this should be consistent, but a bit unsure.
 44 |         return SemanticSegmentation(
 45 |             mask=torch.tensor(np.array(resized_mask_img).astype(np.int32)),
 46 |             has_label_mask=resized_has_label_mask)
 47 | 
 48 |     def transpose(self, method):
 49 |         if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
 50 |             raise NotImplementedError(
 51 |                 "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
 52 |             )
 53 | 
 54 |         if method == FLIP_LEFT_RIGHT:
 55 |             flipped_mask = torch.flip(self.mask, [1])
 56 |             flipped_has_label_mask = torch.flip(self.has_label_mask, [1])
 57 |         elif method == FLIP_TOP_BOTTOM:
 58 |             flipped_mask = torch.flip(self.mask, [0])
 59 |             flipped_has_label_mask = torch.flip(self.has_label_mask, [0])
 60 | 
 61 |         return SemanticSegmentation(mask=flipped_mask, has_label_mask=flipped_has_label_mask)
 62 |         
 63 |     def crop(self, box):
 64 |         cropped_mask = self.mask[box[1]:box[3], box[0]:box[2]]
 65 |         cropped_has_label_mask = self.has_label_mask[box[1]:box[3], box[0]:box[2]]
 66 | 
 67 |         return SemanticSegmentation(mask=cropped_mask, has_label_mask=cropped_has_label_mask)
 68 | 
 69 | class PanopticTarget(object):
 70 |     # region: Mask RCNN object information
 71 |     # segmentation: Semantic segmentation class map.
 72 |     def __init__(self, region, segmentation):
 73 |         self.region = region
 74 |         self.segmentation = segmentation
 75 | 
 76 |     def to(self, device):
 77 |         return PanopticTarget(
 78 |             region=self.region.to(device),
 79 |             segmentation=self.segmentation.to(device))
 80 | 
 81 |     def clip_to_image(self, remove_empty=True):
 82 |         # clip the bounding box.
 83 |         self.region = self.region.clip_to_image(remove_empty=remove_empty)
 84 |         
 85 |         # doesn't seem like there should be anything to clip for the
 86 |         # segmentation.
 87 |         return self
 88 | 
 89 |     def resize(self, size, *args, **kwargs):
 90 |         return PanopticTarget(
 91 |             region=self.region.resize(size, *args, **kwargs),
 92 |             segmentation=self.segmentation.resize(size, *args, **kwargs))
 93 | 
 94 |     def transpose(self, method):
 95 |         return PanopticTarget(
 96 |             region=self.region.transpose(method),
 97 |             segmentation=self.segmentation.transpose(method))
 98 | 
 99 |     def crop(self, box):
100 |         return PanopticTarget(
101 |             region=self.region.crop(box),
102 |             segmentation=self.segmentation.crop(box))
103 |         
104 |         
105 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlazarow/learning_instance_occlusion/60f10b4b39dcb0e6c80ebb9146eda24d8dbc08b1/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, "log.txt"))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_serialization.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | import logging
 4 | 
 5 | import pdb
 6 | import torch
 7 | 
 8 | from maskrcnn_benchmark.utils.imports import import_file
 9 | 
10 | 
11 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
12 |     """
13 |     Strategy: suppose that the models that we will create will have prefixes appended
14 |     to each of its keys, for example due to an extra level of nesting that the original
15 |     pre-trained weights from ImageNet won't contain. For example, model.state_dict()
16 |     might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
17 |     res2.conv1.weight. We thus want to match both parameters together.
18 |     For that, we look for each model weight, look among all loaded keys if there is one
19 |     that is a suffix of the current weight name, and use it if that's the case.
20 |     If multiple matches exist, take the one with longest size
21 |     of the corresponding name. For example, for the same model as before, the pretrained
22 |     weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
23 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
24 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
25 |     """
26 |     current_keys = sorted(list(model_state_dict.keys()))
27 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
28 | 
29 |     # get a matrix of string matches, where each (i, j) entry correspond to the size of the
30 |     # loaded_key string, if it matches
31 |     match_matrix = [
32 |         len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys
33 |     ]
34 |     match_matrix = torch.as_tensor(match_matrix).view(
35 |         len(current_keys), len(loaded_keys)
36 |     )
37 |     max_match_size, idxs = match_matrix.max(1)
38 |     # remove indices that correspond to no-match
39 |     idxs[max_match_size == 0] = -1
40 | 
41 |     # used for logging
42 |     max_size = max([len(key) for key in current_keys]) if current_keys else 1
43 |     max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
44 |     log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
45 |     logger = logging.getLogger(__name__)
46 |     for idx_new, idx_old in enumerate(idxs.tolist()):
47 |         if idx_old == -1:
48 |             continue
49 |         key = current_keys[idx_new]
50 |         key_old = loaded_keys[idx_old]
51 |         model_state_dict[key] = loaded_state_dict[key_old]
52 |         logger.info(
53 |             log_str_template.format(
54 |                 key,
55 |                 max_size,
56 |                 key_old,
57 |                 max_size_loaded,
58 |                 tuple(loaded_state_dict[key_old].shape),
59 |             )
60 |         )
61 | 
62 | 
63 | def strip_prefix_if_present(state_dict, prefix):
64 |     keys = sorted(state_dict.keys())
65 |     if not all(key.startswith(prefix) for key in keys):
66 |         return state_dict
67 |     stripped_state_dict = OrderedDict()
68 |     for key, value in state_dict.items():
69 |         stripped_state_dict[key.replace(prefix, "")] = value
70 |     return stripped_state_dict
71 | 
72 | 
73 | def load_state_dict(model, loaded_state_dict):
74 |     model_state_dict = model.state_dict()
75 |     # if the state_dict comes from a model that was wrapped in a
76 |     # DataParallel or DistributedDataParallel during serialization,
77 |     # remove the "module" prefix before performing the matching
78 |     loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.")
79 |     align_and_update_state_dicts(model_state_dict, loaded_state_dict)
80 | 
81 |     # use strict loading
82 |     model.load_state_dict(model_state_dict)
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | from torch.utils.model_zoo import _download_url_to_file
 6 | from torch.utils.model_zoo import urlparse
 7 | from torch.utils.model_zoo import HASH_REGEX
 8 | 
 9 | from maskrcnn_benchmark.utils.comm import is_main_process
10 | from maskrcnn_benchmark.utils.comm import synchronize
11 | 
12 | 
13 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
14 | # but with a few improvements and modifications
15 | def cache_url(url, model_dir=None, progress=True):
16 |     r"""Loads the Torch serialized object at the given URL.
17 |     If the object is already present in `model_dir`, it's deserialized and
18 |     returned. The filename part of the URL should follow the naming convention
19 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
20 |     digits of the SHA256 hash of the contents of the file. The hash is used to
21 |     ensure unique names and to verify the contents of the file.
22 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
23 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
24 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
25 |     Args:
26 |         url (string): URL of the object to download
27 |         model_dir (string, optional): directory in which to save the object
28 |         progress (bool, optional): whether or not to display a progress bar to stderr
29 |     Example:
30 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
31 |     """
32 |     if model_dir is None:
33 |         torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch'))
34 |         model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models'))
35 |     if not os.path.exists(model_dir):
36 |         os.makedirs(model_dir)
37 |     parts = urlparse(url)
38 |     filename = os.path.basename(parts.path)
39 |     if filename == "model_final.pkl":
40 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
41 |         # so make the full path the filename by replacing / with _
42 |         filename = parts.path.replace("/", "_")
43 |     cached_file = os.path.join(model_dir, filename)
44 |     if not os.path.exists(cached_file) and is_main_process():
45 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
46 |         hash_prefix = HASH_REGEX.search(filename)
47 |         if hash_prefix is not None:
48 |             hash_prefix = hash_prefix.group(1)
49 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
50 |             # which matches the hash PyTorch uses. So we skip the hash matching
51 |             # if the hash_prefix is less than 6 characters
52 |             if len(hash_prefix) < 6:
53 |                 hash_prefix = None
54 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
55 |     synchronize()
56 |     return cached_file
57 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------