├── INSTALL.md
├── LICENSE
├── README.md
├── cfgs
    ├── CIHP
    │   ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml
    │   └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml
    ├── DensePose_COCO
    │   └── e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml
    └── MHP-v2
    │   ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml
    │   └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml
├── ckpts
    └── README.md
├── data
    ├── output.png
    └── parsing_rcnn.png
├── make.sh
├── models
    ├── __init__.py
    ├── imagenet
    │   ├── __init__.py
    │   ├── hrnet.py
    │   ├── mobilenet_v1.py
    │   ├── mobilenet_v2.py
    │   ├── mobilenet_v3.py
    │   ├── resnet.py
    │   ├── resnext.py
    │   ├── utils.py
    │   └── vovnet.py
    └── ops
    │   ├── __init__.py
    │   ├── adjust_smooth_l1_loss.py
    │   ├── affine.py
    │   ├── batch_norm.py
    │   ├── bilinear_interpolation2d.py
    │   ├── boxes.py
    │   ├── context_block.py
    │   ├── conv2d_samepadding.py
    │   ├── conv2d_ws.py
    │   ├── csrc
    │       ├── PoolPointsInterp.h
    │       ├── ROIAlign.h
    │       ├── ROIPool.h
    │       ├── SigmoidFocalLoss.h
    │       ├── cpu
    │       │   ├── ROIAlign_cpu.cpp
    │       │   ├── nms_cpu.cpp
    │       │   └── vision.h
    │       ├── cuda
    │       │   ├── PoolPointsInterp_cuda.cu
    │       │   ├── ROIAlign_cuda.cu
    │       │   ├── ROIPool_cuda.cu
    │       │   ├── SigmoidFocalLoss_cuda.cu
    │       │   ├── deform_conv_cuda.cu
    │       │   ├── deform_conv_kernel_cuda.cu
    │       │   ├── deform_pool_cuda.cu
    │       │   ├── deform_pool_kernel_cuda.cu
    │       │   ├── ml_nms.cu
    │       │   ├── nms.cu
    │       │   └── vision.h
    │       ├── deform_conv.h
    │       ├── deform_pool.h
    │       ├── ml_nms.h
    │       ├── nms.h
    │       └── vision.cpp
    │   ├── cython_bbox.c
    │   ├── cython_bbox.pyx
    │   ├── cython_nms.c
    │   ├── cython_nms.pyx
    │   ├── dcn
    │       ├── __init__.py
    │       ├── deform_conv_func.py
    │       ├── deform_conv_module.py
    │       ├── deform_pool_func.py
    │       └── deform_pool_module.py
    │   ├── dropblock.py
    │   ├── iou_loss.py
    │   ├── l2_loss.py
    │   ├── l2norm.py
    │   ├── label_smoothing.py
    │   ├── lovasz_hinge_loss.py
    │   ├── misc.py
    │   ├── mixture_batchnorm.py
    │   ├── nms.py
    │   ├── nonlocal2d.py
    │   ├── pool_points_interp.py
    │   ├── scale.py
    │   ├── setup_rcnn.py
    │   ├── setup_ssd.py
    │   ├── sigmoid_focal_loss.py
    │   ├── smooth_l1_loss.py
    │   └── squeeze_excitation.py
├── rcnn
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   ├── test.py
    │   └── test_engine.py
    ├── datasets
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── dataset_catalog.py
    │   ├── evaluation.py
    │   └── transform.py
    ├── modeling
    │   ├── backbone
    │   │   ├── HRNet.py
    │   │   ├── MobileNet_v1.py
    │   │   ├── MobileNet_v2.py
    │   │   ├── MobileNet_v3.py
    │   │   ├── ResNeXt.py
    │   │   ├── ResNet.py
    │   │   ├── VoVNet.py
    │   │   └── __init__.py
    │   ├── cascade_rcnn
    │   │   ├── __init__.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   ├── convfc_heads.py
    │   │   │   └── mlp_heads.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── outputs.py
    │   ├── fast_rcnn
    │   │   ├── __init__.py
    │   │   ├── fast_rcnn.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   ├── convfc_heads.py
    │   │   │   └── mlp_heads.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── outputs.py
    │   ├── fpn
    │   │   ├── FPN.py
    │   │   ├── HRFPN.py
    │   │   └── __init__.py
    │   ├── keypoint_rcnn
    │   │   ├── __init__.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   ├── convx_heads.py
    │   │   │   └── gce_heads.py
    │   │   ├── inference.py
    │   │   ├── keypoint_rcnn.py
    │   │   ├── loss.py
    │   │   └── outputs.py
    │   ├── mask_rcnn
    │   │   ├── __init__.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   └── convx_heads.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── mask_rcnn.py
    │   │   └── outputs.py
    │   ├── model_builder.py
    │   ├── parsing_rcnn
    │   │   ├── __init__.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   ├── convx_heads.py
    │   │   │   └── gce_heads.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── outputs.py
    │   │   └── parsing_rcnn.py
    │   ├── registry.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   └── rpn.py
    │   └── uv_rcnn
    │   │   ├── heads
    │   │       ├── __init__.py
    │   │       ├── convx_heads.py
    │   │       └── gce_heads.py
    │   │   ├── inference.py
    │   │   ├── loss.py
    │   │   ├── outputs.py
    │   │   └── uv_rcnn.py
    ├── ops
    │   ├── __init__.py
    │   ├── deform_pool.py
    │   ├── roi_align.py
    │   └── roi_pool.py
    └── utils
    │   ├── __init__.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── matcher.py
    │   ├── misc.py
    │   └── poolers.py
├── requirements.txt
├── tools
    ├── _init_paths.py
    ├── test_net.py
    └── train_net.py
├── utils
    ├── __init__.py
    ├── checkpointer.py
    ├── collections.py
    ├── colormap.py
    ├── comm.py
    ├── data
    │   ├── __init__.py
    │   ├── collate_batch.py
    │   ├── dataset_catalog.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   └── concat_dataset.py
    │   ├── evaluation
    │   │   ├── densepose_cocoeval.py
    │   │   ├── densepose_methods.py
    │   │   └── parsing_eval.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   ├── iteration_based_batch_sampler.py
    │   │   ├── range_sampler.py
    │   │   └── repeat_factor.py
    │   ├── structures
    │   │   ├── __init__.py
    │   │   ├── bounding_box.py
    │   │   ├── boxlist_ops.py
    │   │   ├── densepose_uv.py
    │   │   ├── image_list.py
    │   │   ├── keypoint.py
    │   │   ├── parsing.py
    │   │   └── segmentation_mask.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   └── transforms.py
    ├── image.py
    ├── logger.py
    ├── lr_scheduler.py
    ├── measure.py
    ├── misc.py
    ├── net.py
    ├── optimizer.py
    ├── registry.py
    ├── subprocess.py
    ├── timer.py
    └── vis.py
└── weights
    └── README.md


/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Install
 2 | 
 3 | ```
 4 | # install pytorch 1.1 and torchvision
 5 | sudo pip3 install torch==1.1 torchvision
 6 | 
 7 | # install apex
 8 | cd $INSTALL_DIR
 9 | git clone https://github.com/NVIDIA/apex.git
10 | cd apex
11 | sudo python setup.py install --cuda_ext --cpp_ext
12 | 
13 | # clone Hier-R-CNN
14 | git clone https://github.com/soeaver/Parsing-R-CNN.git
15 | 
16 | # install other requirements
17 | pip3 install -r requirements.txt
18 | 
19 | # mask ops
20 | cd Hier-R-CNN
21 | sh make.sh
22 | 
23 | # make cocoapi
24 | cd Parsing-R-CNN/cocoapi/PythonAPI
25 | mask
26 | cd ../../
27 | ln -s cocoapi/PythonAPI/pycocotools/ ./
28 | ```
29 | 
30 | ## Data and Pre-train weights
31 | 
32 |   Make sure to put the files as the following structure:
33 | 
34 |   ```
35 |   ├─data
36 |   │  ├─coco
37 |   │  │  ├─images
38 |   │  │  │  ├─train2017
39 |   │  │  │  ├─val2017
40 |   │  │  ├─annotations
41 |   │  │  │  ├─DensePoseData
42 |   │  │  │  │  ├─densepose_coco_train2017.json
43 |   │  │  │  │  ├─densepose_coco_val2017.json
44 |   │  │  │  │  ├─densepose_coco_test2017.json
45 |   |  |
46 |   │  ├─CIHP
47 |   │  │  ├─train_img
48 |   │  │  │─train_parsing
49 |   │  │  │─train_seg
50 |   │  │  ├─val_img
51 |   │  │  │─val_parsing
52 |   │  │  │─val_seg  
53 |   │  │  ├─annotations
54 |   │  │  │  ├─CIHP_train.json
55 |   │  │  │  ├─CIHP_val.json
56 |   |  |
57 |   │  ├─MHP-v2
58 |   │  │  ├─train_img
59 |   │  │  │─train_parsing
60 |   │  │  │─train_seg
61 |   │  │  ├─val_img
62 |   │  │  │─val_parsing
63 |   │  │  │─val_seg  
64 |   │  │  ├─annotations
65 |   │  │  │  ├─MHP-v2_train.json
66 |   │  │  │  ├─MHP-v2_val.json
67 |   |
68 |   ├─weights
69 |      ├─resnet50_caffe.pth
70 |      ├─resnet101_caffe.pth
71 |      ├─resnext101_32x8d-8ba56ff5.pth
72 | 
73 |   ```
74 |   
75 |   - Densepose estimation using original coco images.
76 |   - For training and evaluating densepose estimation on Parsing R-CNN, you need fetch DensePose data following [original repo](https://github.com/facebookresearch/DensePose/blob/master/INSTALL.md#fetch-densepose-data)
77 | 
78 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Parsing-R-CNN
  2 | 
  3 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/human-part-segmentation-on-cihp)](https://paperswithcode.com/sota/human-part-segmentation-on-cihp?p=parsing-r-cnn-for-instance-level-human)
  4 | 
  5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/pose-estimation-on-densepose-coco)](https://paperswithcode.com/sota/pose-estimation-on-densepose-coco?p=parsing-r-cnn-for-instance-level-human)
  6 | 
  7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/human-part-segmentation-on-mhp-v20)](https://paperswithcode.com/sota/human-part-segmentation-on-mhp-v20?p=parsing-r-cnn-for-instance-level-human)
  8 | 
  9 | **(New!)** Official implementation of **Parsing R-CNN for Instance-Level Human Analysis (CVPR 2019)**
 10 | 
 11 | ## Citing Parsing R-CNN
 12 | 
 13 | If you use Parsing R-CNN, please use the following BibTeX entry.
 14 | 
 15 | ```BibTeX
 16 | @inproceedings{yang2019cvpr,
 17 |   title = {Parsing R-CNN for Instance-Level Human Analysis},
 18 |   author = {Lu Yang and Qing Song and Zhihui Wang and Ming Jiang},
 19 |   booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 20 |   year = {2019}
 21 | }
 22 | 
 23 | ```
 24 | 
 25 | In this repository, we release the Parsing R-CNN code in Pytorch.
 26 | 
 27 | - Parsing R-CNN architecture:
 28 | <p align="center"><img width="90%" src="data/parsing_rcnn.png" /></p>
 29 | 
 30 | - Parsing R-CNN output:
 31 | <p align="center"><img width="90%" src="data/output.png" /></p>
 32 | 
 33 | 
 34 | ## Installation
 35 | - 8 x TITAN RTX GPU
 36 | - pytorch1.1
 37 | - python3.6.8
 38 | 
 39 | Install Parsing R-CNN following [INSTALL.md](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#install).
 40 | 
 41 | 
 42 | ## Dataset
 43 | 
 44 | You need to download the datasets and annotations following this repo's formate. As:
 45 | 
 46 | - [CIHP](https://drive.google.com/open?id=1OLBd23ufm6CU8CZmLEYMdF-x2b8mRgxV)
 47 | 
 48 | - [MHP-v2](coming soon)
 49 | 
 50 | - [DensePoseData](https://drive.google.com/open?id=1WiTLYVIgMyCDENXHPVEWW7qbZ-3EBjbt)(using original [MSCOCO2017](http://cocodataset.org/#download) images)
 51 | 
 52 | And following [data structure](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#data-and-pre-train-weights) to train or evaluate Parsing R-CNN models.
 53 | 
 54 | 
 55 | ## Results and Models
 56 | 
 57 | **On CIHP val**
 58 | 
 59 | |  Backbone  |  LR  | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD |
 60 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:|
 61 | |  R-50-FPN  |  1x  | 65.8   | 52.8 |      57.2/51.2/55.4        |          |
 62 | |  R-50-FPN  |  3x  | 68.7   | 56.0 |      64.1/54.1/60.7        | [GoogleDrive](https://drive.google.com/open?id=16bASrD7AoCADKzXynIgmdyzmbuzCfAUL)|
 63 | 
 64 | 
 65 | **On MHP-v2 val**
 66 | 
 67 | |  Backbone  |  LR  | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD |
 68 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:|
 69 | |  R-50-FPN  |  1x  | 66.5   | 34.0 |      19.9/36.7/32.4        |          |
 70 | |  R-50-FPN  |  3x  | 69.0   | 36.1 |      27.4/40.5/38.3        | [GoogleDrive](https://drive.google.com/open?id=1rbSNP4_DoJdNK4l6KHrthO0x4WOFgHGy)|
 71 | 
 72 | 
 73 | **On DensePose_COCO val**
 74 | 
 75 | |  Backbone  |  LR  | Det AP |UV AP (AP/AP50/AP75/APm/APl)| DOWNLOAD |
 76 | |------------|:----:|:------:|:--------------------------:| :-------:|
 77 | |  R-50-FPN  |  s1x | 57.4   |  59.3/90.5/68.7/56.2/60.8  | [GoogleDrive](https://drive.google.com/open?id=1YQygKoOb5SbZWYnF7f9vEpC_NenpMhH5)|
 78 | 
 79 | 
 80 | - New metric GPSm is adopted for evaluating UV
 81 | 
 82 | 
 83 | **ImageNet pretrained weight**
 84 | 
 85 | - [R-50](https://drive.google.com/open?id=1EtqFhrFTdBJNbp67effArVrTNx4q_ELr)
 86 | - [R-50-GN](https://drive.google.com/open?id=1LzcVD7aADhXXY32DdtKhaY9hTXaduhlg)
 87 | - [X-101-32x8d](https://drive.google.com/open?id=1c4OSVZIZtDT49B0DTC0tK3vcRgJpzR9n)
 88 | 
 89 | 
 90 | ## Visualization
 91 | 
 92 | coming soon.
 93 | 
 94 | 
 95 | ## Training
 96 | 
 97 | To train a model with 8 GPUs run:
 98 | ```
 99 | python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --cfg cfgs/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml
100 | ```
101 | 
102 | 
103 | ## Evaluation
104 | 
105 | ### multi-gpu evaluation,
106 | ```
107 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0,1,2,3,4,5,6,7
108 | ```
109 | 
110 | ### single-gpu evaluation,
111 | ```
112 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0
113 | ```
114 | 
115 | 
116 | ## License
117 | Parsing-R-CNN is released under the [MIT license](https://github.com/soeaver/Parsing-R-CNN/blob/master/LICENSE).
118 | 


--------------------------------------------------------------------------------
/cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml:
--------------------------------------------------------------------------------
 1 | # bbox_AP: 65.8/92.8/73.7/3.4/46.8/68.7;
 2 | # parsing: (mIoU:52.8/AP50:57.2/APvol:51.2/PCP50:55.4)
 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
 4 | PIXEL_STDS: [1.0, 1.0, 1.0]
 5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms'
 6 | MODEL:
 7 |   FPN_ON: True
 8 |   FASTER_ON: True
 9 |   PARSING_ON: True
10 |   NUM_CLASSES: 2
11 |   CONV1_RGB2BGR: False  # caffe style
12 | BACKBONE:
13 |   CONV_BODY: "resnet"
14 |   RESNET:  # caffe style
15 |     LAYERS: (3, 4, 6, 3)
16 | RPN:
17 |   ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |   PRE_NMS_TOP_N_TRAIN: 2000
19 |   PRE_NMS_TOP_N_TEST: 1000
20 |   POST_NMS_TOP_N_TEST: 1000
21 |   FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 |   ROI_XFORM_RESOLUTION: (7, 7)
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 |   ROI_PARSING_HEAD: "roi_gce_head"
27 |   NUM_PARSING: 20
28 |   ROI_STRIDES: [4]
29 |   ROI_SIZE_PER_IMG: 16
30 |   ROI_XFORM_RESOLUTION: (32, 32)
31 |   ROI_XFORM_SAMPLING_RATIO: 2
32 |   RESOLUTION: (128, 128)
33 |   LOSS_WEIGHT: 2.0  # double loss weight
34 |   GCE_HEAD:
35 |     NUM_CONVS_AFTER_ASPPV3: 4
36 |     USE_NL: True
37 | SOLVER:
38 |   WEIGHT_DECAY: 0.0001
39 |   BASE_LR: 0.02
40 |   GAMMA: 0.1
41 |   WARM_UP_ITERS: 500
42 |   WARM_UP_FACTOR: 0.01
43 |   MAX_ITER: 45000
44 |   STEPS: [30000, 40000]
45 |   SNAPSHOT_ITERS: 5000
46 | TRAIN:
47 |   WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
48 |   DATASETS: ('CIHP_train', )
49 |   SCALES: (512, 640, 704, 768, 800, 864)
50 |   MAX_SIZE: 1400
51 | TEST:
52 |   DATASETS: ('CIHP_val',)
53 |   SCALE: 800
54 |   MAX_SIZE: 1333
55 | 


--------------------------------------------------------------------------------
/cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml:
--------------------------------------------------------------------------------
 1 | # bbox_AP: 68.7/93.0/76.2/2.0/48.0/71.8;
 2 | # parsing: (mIoU:56.0/AP50:64.1/APvol:54.1/PCP50:60.7)
 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
 4 | PIXEL_STDS: [1.0, 1.0, 1.0]
 5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms'
 6 | MODEL:
 7 |   FPN_ON: True
 8 |   FASTER_ON: True
 9 |   PARSING_ON: True
10 |   NUM_CLASSES: 2
11 |   CONV1_RGB2BGR: False  # caffe style
12 | BACKBONE:
13 |   CONV_BODY: "resnet"
14 |   RESNET:  # caffe style
15 |     LAYERS: (3, 4, 6, 3)
16 | RPN:
17 |   ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |   PRE_NMS_TOP_N_TRAIN: 2000
19 |   PRE_NMS_TOP_N_TEST: 1000
20 |   POST_NMS_TOP_N_TEST: 1000
21 |   FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 |   ROI_XFORM_RESOLUTION: (7, 7)
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 |   ROI_PARSING_HEAD: "roi_gce_head"
27 |   NUM_PARSING: 20
28 |   ROI_STRIDES: [4]
29 |   ROI_SIZE_PER_IMG: 16
30 |   ROI_XFORM_RESOLUTION: (32, 32)
31 |   ROI_XFORM_SAMPLING_RATIO: 2
32 |   RESOLUTION: (128, 128)
33 |   LOSS_WEIGHT: 2.0  # double loss weight
34 |   GCE_HEAD:
35 |     NUM_CONVS_AFTER_ASPPV3: 4
36 |     USE_NL: True
37 | SOLVER:
38 |   WEIGHT_DECAY: 0.0001
39 |   BASE_LR: 0.02
40 |   GAMMA: 0.1
41 |   WARM_UP_ITERS: 500
42 |   WARM_UP_FACTOR: 0.01
43 |   MAX_ITER: 135000
44 |   STEPS: [105000, 125000]
45 | TRAIN:
46 |   WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
47 |   DATASETS: ('CIHP_train', )
48 |   SCALES: (512, 640, 704, 768, 800, 864)
49 |   MAX_SIZE: 1400
50 | TEST:
51 |   DATASETS: ('CIHP_val',)
52 |   SCALE: 800
53 |   MAX_SIZE: 1333
54 | 


--------------------------------------------------------------------------------
/cfgs/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml:
--------------------------------------------------------------------------------
 1 | # bbox_AP: 57.4/87.6/62.7/30.3/56.2/70.2;  
 2 | # uv_AP (GPSm): 59.3/90.5/68.7/52.6/60.8;
 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
 4 | PIXEL_STDS: [1.0, 1.0, 1.0]
 5 | CKPT: 'ckpts/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms'
 6 | MODEL:
 7 |   FPN_ON: True
 8 |   FASTER_ON: True
 9 |   UV_ON: True
10 |   NUM_CLASSES: 2
11 |   CONV1_RGB2BGR: False  # caffe style
12 | BACKBONE:
13 |   CONV_BODY: "resnet"
14 |   RESNET:  # caffe style
15 |     LAYERS: (3, 4, 6, 3)
16 | RPN:
17 |   ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |   PRE_NMS_TOP_N_TRAIN: 2000
19 |   PRE_NMS_TOP_N_TEST: 1000
20 |   POST_NMS_TOP_N_TEST: 1000
21 |   FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 |   ROI_XFORM_RESOLUTION: (7, 7)
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | UVRCNN:
26 |   ROI_UV_HEAD: "roi_gce_head"
27 |   ROI_STRIDES: [4]
28 |   ROI_SIZE_PER_IMG: 32
29 |   ROI_XFORM_RESOLUTION: (32, 32)
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 |   RESOLUTION: (128, 128)
32 |   INDEX_WEIGHTS : 2.0
33 |   PART_WEIGHTS : 0.3
34 |   POINT_REGRESSION_WEIGHTS : 0.1
35 |   GCE_HEAD:
36 |     NUM_CONVS_AFTER_ASPPV3: 4
37 |     USE_NL: True
38 | SOLVER:
39 |   WEIGHT_DECAY: 0.0001
40 |   BASE_LR: 0.002
41 |   GAMMA: 0.1
42 |   WARM_UP_ITERS: 500
43 |   MAX_ITER: 130000
44 |   STEPS: [100000, 120000]
45 | TRAIN:
46 |   WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
47 |   DATASETS: ('dense_coco_2017_train', )
48 |   SCALES: (512, 640, 704, 768, 800, 864)
49 |   MAX_SIZE: 1400
50 | TEST:
51 |   DATASETS: ('dense_coco_2017_val',)
52 |   SCALE: 800
53 |   MAX_SIZE: 1333
54 | 


--------------------------------------------------------------------------------
/cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml:
--------------------------------------------------------------------------------
 1 | # bbox_AP: 66.5/93.8/76.8/-1.0/52.0/66.7;
 2 | # parsing: (mIoU:34.0/AP50:19.9/APvol:37.6/PCP50:32.4)
 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
 4 | PIXEL_STDS: [1.0, 1.0, 1.0]
 5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms'
 6 | MODEL:
 7 |   FPN_ON: True
 8 |   FASTER_ON: True
 9 |   PARSING_ON: True
10 |   NUM_CLASSES: 2
11 |   CONV1_RGB2BGR: False  # caffe style
12 | BACKBONE:
13 |   CONV_BODY: "resnet"
14 |   RESNET:  # caffe style
15 |     LAYERS: (3, 4, 6, 3)
16 | RPN:
17 |   ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |   PRE_NMS_TOP_N_TRAIN: 2000
19 |   PRE_NMS_TOP_N_TEST: 1000
20 |   POST_NMS_TOP_N_TEST: 1000
21 |   FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 |   ROI_XFORM_RESOLUTION: (7, 7)
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 |   ROI_PARSING_HEAD: "roi_gce_head"
27 |   NUM_PARSING: 59
28 |   ROI_STRIDES: [4]
29 |   ROI_SIZE_PER_IMG: 16
30 |   ROI_XFORM_RESOLUTION: (32, 32)
31 |   ROI_XFORM_SAMPLING_RATIO: 2
32 |   RESOLUTION: (128, 128)
33 |   SEMSEG_SCORE_THRESH: 0.05
34 |   LOSS_WEIGHT: 2.0  # double loss weight
35 |   GCE_HEAD:
36 |     NUM_CONVS_AFTER_ASPPV3: 4
37 |     USE_NL: True
38 | SOLVER:
39 |   WEIGHT_DECAY: 0.0001
40 |   BASE_LR: 0.02
41 |   GAMMA: 0.1
42 |   WARM_UP_ITERS: 500
43 |   WARM_UP_FACTOR: 0.01
44 |   MAX_ITER: 24000
45 |   STEPS: [15000, 20000]
46 |   SNAPSHOT_ITERS: 5000
47 | TRAIN:
48 |   WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
49 |   DATASETS: ('MHP-v2_train', )
50 |   SCALES: (512, 640, 704, 768, 800, 864)
51 |   MAX_SIZE: 1400
52 | TEST:
53 |   DATASETS: ('MHP-v2_val',)
54 |   SCALE: 800
55 |   MAX_SIZE: 1333
56 | 


--------------------------------------------------------------------------------
/cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml:
--------------------------------------------------------------------------------
 1 | # bbox_AP: 69.0/94.1/78.8/-1.0/56.7/69.1;
 2 | # parsing: (mIoU:36.1/AP50:27.4/APvol:40.5/PCP50:38.3)
 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
 4 | PIXEL_STDS: [1.0, 1.0, 1.0]
 5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms'
 6 | MODEL:
 7 |   FPN_ON: True
 8 |   FASTER_ON: True
 9 |   PARSING_ON: True
10 |   NUM_CLASSES: 2
11 |   CONV1_RGB2BGR: False  # caffe style
12 | BACKBONE:
13 |   CONV_BODY: "resnet"
14 |   RESNET:  # caffe style
15 |     LAYERS: (3, 4, 6, 3)
16 | RPN:
17 |   ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 |   PRE_NMS_TOP_N_TRAIN: 2000
19 |   PRE_NMS_TOP_N_TEST: 1000
20 |   POST_NMS_TOP_N_TEST: 1000
21 |   FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 |   ROI_XFORM_RESOLUTION: (7, 7)
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 |   ROI_PARSING_HEAD: "roi_gce_head"
27 |   NUM_PARSING: 59
28 |   ROI_STRIDES: [4]
29 |   ROI_SIZE_PER_IMG: 16
30 |   ROI_XFORM_RESOLUTION: (32, 32)
31 |   ROI_XFORM_SAMPLING_RATIO: 2
32 |   RESOLUTION: (128, 128)
33 |   SEMSEG_SCORE_THRESH: 0.05
34 |   LOSS_WEIGHT: 2.0  # double loss weight
35 |   GCE_HEAD:
36 |     NUM_CONVS_AFTER_ASPPV3: 4
37 |     USE_NL: True
38 | SOLVER:
39 |   WEIGHT_DECAY: 0.0001
40 |   BASE_LR: 0.02
41 |   GAMMA: 0.1
42 |   WARM_UP_ITERS: 500
43 |   WARM_UP_FACTOR: 0.01
44 |   MAX_ITER: 72000
45 |   STEPS: [54000, 64000]
46 | TRAIN:
47 |   WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
48 |   DATASETS: ('MHP-v2_train', )
49 |   SCALES: (512, 640, 704, 768, 800, 864)
50 |   MAX_SIZE: 1400
51 | TEST:
52 |   DATASETS: ('MHP-v2_val',)
53 |   SCALE: 800
54 |   MAX_SIZE: 1333
55 | 


--------------------------------------------------------------------------------
/ckpts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/ckpts/README.md


--------------------------------------------------------------------------------
/data/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/output.png


--------------------------------------------------------------------------------
/data/parsing_rcnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/parsing_rcnn.png


--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # export CXXFLAGS="-std=c++11"
 4 | # export CFLAGS="-std=c99"
 5 | 
 6 | PYTHON=${PYTHON:-"python"}
 7 | cd models/ops
 8 | 
 9 | echo "Building bbox op..."
10 | python setup_ssd.py build_ext --inplace
11 | rm -rf build
12 | 
13 | echo "Building rcnn op..."
14 | if [ -d "build" ]; then
15 |     rm -r build
16 | fi
17 | $PYTHON setup_rcnn.py build_ext --inplace
18 | rm -r build
19 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/models/imagenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import *
2 | from .mobilenet_v1 import *
3 | from .mobilenet_v2 import *
4 | from .mobilenet_v3 import *
5 | from .resnet import *
6 | from .resnext import *
7 | from .vovnet import *
8 | 


--------------------------------------------------------------------------------
/models/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm
 2 | from .misc import Conv2d, ConvTranspose2d, BatchNorm2d, interpolate
 3 | from .nms import nms, ml_nms
 4 | from .l2_loss import l2_loss
 5 | from .iou_loss import IOULoss
 6 | from .scale import Scale
 7 | from .smooth_l1_loss import smooth_l1_loss, smooth_l1_loss_LW
 8 | from .adjust_smooth_l1_loss import AdjustSmoothL1Loss
 9 | from .sigmoid_focal_loss import SigmoidFocalLoss
10 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
11 | from .dcn.deform_conv_module import DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack
12 | from .dcn.deform_pool_func import deform_roi_pooling
13 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
14 | from .affine import AffineChannel2d
15 | from .bilinear_interpolation2d import BilinearInterpolation2d
16 | from .conv2d_samepadding import Conv2dSamePadding
17 | from .conv2d_ws import Conv2dWS
18 | from .dropblock import DropBlock2D
19 | from .l2norm import L2Norm
20 | from .label_smoothing import LabelSmoothing
21 | from .nonlocal2d import NonLocal2d, MS_NonLocal2d
22 | from .squeeze_excitation import SeConv2d, GDWSe2d
23 | from .pool_points_interp import PoolPointsInterp
24 | from .context_block import GlobalContextBlock
25 | from .mixture_batchnorm import MixtureBatchNorm2d, MixtureGroupNorm
26 | from .lovasz_hinge_loss import LovaszHinge
27 | 


--------------------------------------------------------------------------------
/models/ops/adjust_smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class AdjustSmoothL1Loss(nn.Module):
 6 | 
 7 |     def __init__(self, num_features, momentum=0.1, beta=1. /9):
 8 |         super(AdjustSmoothL1Loss, self).__init__()
 9 |         self.num_features = num_features
10 |         self.momentum = momentum
11 |         self.beta = beta
12 |         self.register_buffer(
13 |             'running_mean', torch.empty(num_features).fill_(beta)
14 |         )
15 |         self.register_buffer('running_var', torch.zeros(num_features))
16 | 
17 |     def forward(self, inputs, target, size_average=True):
18 | 
19 |         n = torch.abs(inputs -target)
20 |         with torch.no_grad():
21 |             if torch.isnan(n.var(dim=0)).sum().item() == 0:
22 |                 self.running_mean = self.running_mean.to(n.device)
23 |                 self.running_mean *= (1 - self.momentum)
24 |                 self.running_mean += (self.momentum * n.mean(dim=0))
25 |                 self.running_var = self.running_var.to(n.device)
26 |                 self.running_var *= (1 - self.momentum)
27 |                 self.running_var += (self.momentum * n.var(dim=0))
28 | 
29 | 
30 |         beta = (self.running_mean - self.running_var)
31 |         beta = beta.clamp(max=self.beta, min=1e-3)
32 | 
33 |         beta = beta.view(-1, self.num_features).to(n.device)
34 |         cond = n < beta.expand_as(n)
35 |         loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
36 |         if size_average:
37 |             return loss.mean()
38 |         return loss.sum()
39 | 
40 | 


--------------------------------------------------------------------------------
/models/ops/affine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AffineChannel2d(nn.Module):
 6 |     """ A simple channel-wise affine transformation operation """
 7 |     def __init__(self, num_features):
 8 |         super().__init__()
 9 |         self.num_features = num_features
10 |         self.weight = nn.Parameter(torch.Tensor(num_features))
11 |         self.bias = nn.Parameter(torch.Tensor(num_features))
12 |         self.weight.data.uniform_()
13 |         self.bias.data.zero_()
14 | 
15 |     def forward(self, x):
16 |         return x * self.weight.view(1, self.num_features, 1, 1) + \
17 |             self.bias.view(1, self.num_features, 1, 1)
18 | 


--------------------------------------------------------------------------------
/models/ops/batch_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | from torch import nn
 4 | from torch.autograd.function import Function
 5 | 
 6 | from utils.misc import get_world_size
 7 | 
 8 | 
 9 | class FrozenBatchNorm2d(nn.Module):
10 |     """
11 |     BatchNorm2d where the batch statistics and the affine parameters
12 |     are fixed
13 |     """
14 | 
15 |     def __init__(self, n):
16 |         super(FrozenBatchNorm2d, self).__init__()
17 |         self.register_buffer("weight", torch.ones(n))
18 |         self.register_buffer("bias", torch.zeros(n))
19 |         self.register_buffer("running_mean", torch.zeros(n))
20 |         self.register_buffer("running_var", torch.ones(n))
21 | 
22 |     def forward(self, x):
23 |         # Cast all fixed parameters to half() if necessary 
24 |         if x.dtype == torch.float16:
25 |             self.weight = self.weight.half()
26 |             self.bias = self.bias.half()
27 |             self.running_mean = self.running_mean.half()
28 |             self.running_var = self.running_var.half()
29 | 
30 |         scale = self.weight * self.running_var.rsqrt()
31 |         bias = self.bias - self.running_mean * scale
32 |         scale = scale.reshape(1, -1, 1, 1)
33 |         bias = bias.reshape(1, -1, 1, 1)
34 |         return x * scale + bias
35 |     
36 | 
37 | class AllReduce(Function):
38 |     @staticmethod
39 |     def forward(ctx, input):
40 |         input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())]
41 |         # Use allgather instead of allreduce since I don't trust in-place operations ..
42 |         dist.all_gather(input_list, input, async_op=False)
43 |         inputs = torch.stack(input_list, dim=0)
44 |         return torch.sum(inputs, dim=0)
45 | 
46 |     @staticmethod
47 |     def backward(ctx, grad_output):
48 |         dist.all_reduce(grad_output, async_op=False)
49 |         return grad_output
50 |     
51 |     
52 | class NaiveSyncBatchNorm(nn.BatchNorm2d):
53 |     """
54 |     This function is taken from the detectron2 repo.
55 |     It can be seen here:
56 |     https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/batch_norm.py
57 |     
58 |     `torch.nn.SyncBatchNorm` has known unknown bugs.
59 |     It produces significantly worse AP (and sometimes goes NaN)
60 |     when the batch size on each worker is quite different
61 |     (e.g., when scale augmentation is used, or when it is applied to mask head).
62 |     Use this implementation before `nn.SyncBatchNorm` is fixed.
63 |     It is slower than `nn.SyncBatchNorm`.
64 |     """
65 | 
66 |     def forward(self, input):
67 |         if get_world_size() == 1 or not self.training:
68 |             return super().forward(input)
69 | 
70 |         assert input.shape[0] > 0, "SyncBatchNorm does not support empty inputs"
71 |         C = input.shape[1]
72 |         mean = torch.mean(input, dim=[0, 2, 3])
73 |         meansqr = torch.mean(input * input, dim=[0, 2, 3])
74 | 
75 |         vec = torch.cat([mean, meansqr], dim=0)
76 |         vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())
77 | 
78 |         mean, meansqr = torch.split(vec, C)
79 |         var = meansqr - mean * mean
80 |         self.running_mean += self.momentum * (mean.detach() - self.running_mean)
81 |         self.running_var += self.momentum * (var.detach() - self.running_var)
82 | 
83 |         invstd = torch.rsqrt(var + self.eps)
84 |         scale = self.weight * invstd
85 |         bias = self.bias - mean * scale
86 |         scale = scale.reshape(1, -1, 1, 1)
87 |         bias = bias.reshape(1, -1, 1, 1)
88 |         return input * scale + bias    
89 | 


--------------------------------------------------------------------------------
/models/ops/bilinear_interpolation2d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class BilinearInterpolation2d(nn.Module):
 9 |     """Bilinear interpolation in space of scale.
10 | 
11 |     Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
12 | 
13 |     Adapted from the CVPR'15 FCN code.
14 |     See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
15 |     """
16 |     def __init__(self, in_channels, out_channels, up_scale):
17 |         super().__init__()
18 |         assert in_channels == out_channels
19 |         assert up_scale % 2 == 0, 'Scale should be even'
20 |         self.in_channes = in_channels
21 |         self.out_channels = out_channels
22 |         self.up_scale = int(up_scale)
23 |         self.padding = up_scale // 2
24 | 
25 |         def upsample_filt(size):
26 |             factor = (size + 1) // 2
27 |             if size % 2 == 1:
28 |                 center = factor - 1
29 |             else:
30 |                 center = factor - 0.5
31 |             og = np.ogrid[:size, :size]
32 |             return ((1 - abs(og[0] - center) / factor) *
33 |                     (1 - abs(og[1] - center) / factor))
34 | 
35 |         kernel_size = up_scale * 2
36 |         bil_filt = upsample_filt(kernel_size)
37 | 
38 |         kernel = np.zeros(
39 |             (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32
40 |         )
41 |         kernel[range(in_channels), range(out_channels), :, :] = bil_filt
42 | 
43 |         self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
44 |                                          stride=self.up_scale, padding=self.padding)
45 | 
46 |         self.upconv.weight.data.copy_(torch.from_numpy(kernel))
47 |         self.upconv.bias.data.fill_(0)
48 |         self.upconv.weight.requires_grad = False
49 |         self.upconv.bias.requires_grad = False
50 | 
51 |     def forward(self, x):
52 |         return self.upconv(x)
53 | 


--------------------------------------------------------------------------------
/models/ops/context_block.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creates a GCB Model as defined in:
  3 | Yue Cao, Jiarui Xu, Stephen Lin, Fangyun Wei, Han Hu. (2019 Arxiv).
  4 | GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond.
  5 | Copyright (c) Yang Lu, 2019
  6 | """
  7 | import torch
  8 | from torch import nn
  9 | 
 10 | 
 11 | def constant_init(module, val, bias=0):
 12 |     nn.init.constant_(module.weight, val)
 13 |     if hasattr(module, 'bias') and module.bias is not None:
 14 |         nn.init.constant_(module.bias, bias)
 15 | 
 16 | 
 17 | def last_zero_init(m):
 18 |     if isinstance(m, nn.Sequential):
 19 |         constant_init(m[-1], val=0)
 20 |     else:
 21 |         constant_init(m, val=0)
 22 | 
 23 | 
 24 | class GlobalContextBlock(nn.Module):
 25 |     def __init__(self, inplanes, innerplanse, pooling_type='att', fusion_types=('channel_add', )):
 26 |         super(GlobalContextBlock, self).__init__()
 27 |         assert pooling_type in ['avg', 'att']
 28 |         assert isinstance(fusion_types, (list, tuple))
 29 |         valid_fusion_types = ['channel_add', 'channel_mul']
 30 |         assert all([f in valid_fusion_types for f in fusion_types])
 31 |         assert len(fusion_types) > 0, 'at least one fusion should be used'
 32 |         self.inplanes = inplanes
 33 |         self.innerplanse = innerplanse
 34 |         self.pooling_type = pooling_type
 35 |         self.fusion_types = fusion_types
 36 |         if pooling_type == 'att':
 37 |             self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
 38 |             self.softmax = nn.Softmax(dim=2)
 39 |         else:
 40 |             self.avg_pool = nn.AdaptiveAvgPool2d(1)
 41 |         if 'channel_add' in fusion_types:
 42 |             self.channel_add_conv = nn.Sequential(
 43 |                 nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1),
 44 |                 nn.LayerNorm([self.innerplanse, 1, 1]),
 45 |                 nn.ReLU(inplace=True),
 46 |                 nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1)
 47 |             )
 48 |         else:
 49 |             self.channel_add_conv = None
 50 |         if 'channel_mul' in fusion_types:
 51 |             self.channel_mul_conv = nn.Sequential(
 52 |                 nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1),
 53 |                 nn.LayerNorm([self.innerplanse, 1, 1]),
 54 |                 nn.ReLU(inplace=True),
 55 |                 nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1)
 56 |             )
 57 |         else:
 58 |             self.channel_mul_conv = None
 59 |         self.reset_parameters()
 60 | 
 61 |     def reset_parameters(self):
 62 |         if self.pooling_type == 'att':
 63 |             nn.init.kaiming_normal_(self.conv_mask.weight, mode='fan_in', nonlinearity='relu')
 64 |             self.conv_mask.inited = True
 65 | 
 66 |         if self.channel_add_conv is not None:
 67 |             last_zero_init(self.channel_add_conv)
 68 |         if self.channel_mul_conv is not None:
 69 |             last_zero_init(self.channel_mul_conv)
 70 | 
 71 |     def spatial_pool(self, x):
 72 |         batch, channel, height, width = x.size()
 73 |         if self.pooling_type == 'att':
 74 |             input_x = x
 75 |             # [N, C, H * W]
 76 |             input_x = input_x.view(batch, channel, height * width)
 77 |             # [N, 1, C, H * W]
 78 |             input_x = input_x.unsqueeze(1)
 79 |             # [N, 1, H, W]
 80 |             context_mask = self.conv_mask(x)
 81 |             # [N, 1, H * W]
 82 |             context_mask = context_mask.view(batch, 1, height * width)
 83 |             # [N, 1, H * W]
 84 |             context_mask = self.softmax(context_mask)
 85 |             # [N, 1, H * W, 1]
 86 |             context_mask = context_mask.unsqueeze(-1)
 87 |             # [N, 1, C, 1]
 88 |             context = torch.matmul(input_x, context_mask)
 89 |             # [N, C, 1, 1]
 90 |             context = context.view(batch, channel, 1, 1)
 91 |         else:
 92 |             # [N, C, 1, 1]
 93 |             context = self.avg_pool(x)
 94 | 
 95 |         return context
 96 | 
 97 |     def forward(self, x):
 98 |         # [N, C, 1, 1]
 99 |         context = self.spatial_pool(x)
100 | 
101 |         out = x
102 |         if self.channel_mul_conv is not None:
103 |             # [N, C, 1, 1]
104 |             channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
105 |             out = out * channel_mul_term
106 |         if self.channel_add_conv is not None:
107 |             # [N, C, 1, 1]
108 |             channel_add_term = self.channel_add_conv(context)
109 |             out = out + channel_add_term
110 | 
111 |         return out
112 | 


--------------------------------------------------------------------------------
/models/ops/conv2d_samepadding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from torch import nn
 4 | from torch.nn import functional as F
 5 | 
 6 | 
 7 | class Conv2dSamePadding(nn.Conv2d):
 8 |     """ 2D Convolutions like TensorFlow """
 9 | 
10 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
11 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
12 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
13 | 
14 |     def forward(self, x):
15 |         ih, iw = x.size()[-2:]
16 |         kh, kw = self.weight.size()[-2:]
17 |         sh, sw = self.stride
18 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
19 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
20 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
21 |         if pad_h > 0 or pad_w > 0:
22 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
23 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
24 | 


--------------------------------------------------------------------------------
/models/ops/conv2d_ws.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | class Conv2dWS(nn.Conv2d):
 6 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
 7 |                  padding=0, dilation=1, groups=1, bias=True):
 8 |         super(Conv2dWS, self).__init__(in_channels, out_channels, kernel_size, stride,
 9 |                                        padding, dilation, groups, bias)
10 | 
11 |     def forward(self, x):
12 |         # return super(Conv2d, self).forward(x)
13 |         weight = self.weight
14 |         weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
15 |         weight = weight - weight_mean
16 |         std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
17 |         weight = weight / std.expand_as(weight)
18 |         return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
19 | 


--------------------------------------------------------------------------------
/models/ops/csrc/PoolPointsInterp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef WITH_CUDA
 4 | #include "cuda/vision.h"
 5 | #endif
 6 | 
 7 | // Interface for Python
 8 | at::Tensor PoolPointsInterp_forward(const at::Tensor& input,
 9 |                                     const at::Tensor& rois,
10 |                                     const float spatial_scale) {
11 |   if (input.type().is_cuda()) {
12 | #ifdef WITH_CUDA
13 |     return PoolPointsInterp_forward_cuda(input, rois, spatial_scale);
14 | #else
15 |     AT_ERROR("Not compiled with GPU support");
16 | #endif
17 |   }
18 | }
19 | 
20 | at::Tensor PoolPointsInterp_backward(const at::Tensor& grad,
21 |                                      const at::Tensor& rois,
22 |                                      const float spatial_scale,
23 |                                      const int batch_size,
24 |                                      const int channels,
25 |                                      const int height,
26 |                                      const int width) {
27 |   if (grad.type().is_cuda()) {
28 | #ifdef WITH_CUDA
29 |     return PoolPointsInterp_backward_cuda(grad, rois, spatial_scale, batch_size, channels, height, width);
30 | #else
31 |     AT_ERROR("Not compiled with GPU support");
32 | #endif
33 |   }
34 |   AT_ERROR("Not implemented on the CPU");
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/models/ops/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio,
17 |                             bool aligned) {
18 |   if (input.type().is_cuda()) {
19 | #ifdef WITH_CUDA
20 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned);
26 | }
27 | 
28 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
29 |                              const at::Tensor& rois,
30 |                              const float spatial_scale,
31 |                              const int pooled_height,
32 |                              const int pooled_width,
33 |                              const int batch_size,
34 |                              const int channels,
35 |                              const int height,
36 |                              const int width,
37 |                              const int sampling_ratio,
38 |                              bool aligned) {
39 |   if (grad.type().is_cuda()) {
40 | #ifdef WITH_CUDA
41 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, aligned);
42 | #else
43 |     AT_ERROR("Not compiled with GPU support");
44 | #endif
45 |   }
46 |   AT_ERROR("Not implemented on the CPU");
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/models/ops/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/models/ops/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/models/ops/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/models/ops/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio,
12 |                                 bool aligned);
13 | 
14 | 
15 | at::Tensor nms_cpu(const at::Tensor& dets,
16 |                    const at::Tensor& scores,
17 |                    const float threshold);
18 | 


--------------------------------------------------------------------------------
/models/ops/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <ATen/ATen.h>
 9 | #include <ATen/cuda/CUDAContext.h>
10 | 
11 | #include <THC/THC.h>
12 | #include <THC/THCDeviceUtils.cuh>
13 | 
14 | #include <vector>
15 | #include <iostream>
16 | #include <cmath>
17 | 
18 | 
19 | void DeformablePSROIPoolForward(
20 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 |     const int height, const int width, const int num_bbox,
23 |     const int channels_trans, const int no_trans, const float spatial_scale,
24 |     const int output_dim, const int group_size, const int pooled_size,
25 |     const int part_size, const int sample_per_part, const float trans_std);
26 | 
27 | void DeformablePSROIPoolBackwardAcc(
28 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 |     at::Tensor trans_grad, const int batch, const int channels,
31 |     const int height, const int width, const int num_bbox,
32 |     const int channels_trans, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std);
35 | 
36 | void deform_psroi_pooling_cuda_forward(
37 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
39 |     const int output_dim, const int group_size, const int pooled_size,
40 |     const int part_size, const int sample_per_part, const float trans_std) 
41 | {
42 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 | 
44 |   const int batch = input.size(0);
45 |   const int channels = input.size(1);
46 |   const int height = input.size(2);
47 |   const int width = input.size(3);
48 |   const int channels_trans = no_trans ? 2 : trans.size(1);
49 | 
50 |   const int num_bbox = bbox.size(0);
51 |   if (num_bbox != out.size(0))
52 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 |              out.size(0), num_bbox);
54 | 
55 |   DeformablePSROIPoolForward(
56 |       input, bbox, trans, out, top_count, batch, channels, height, width,
57 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 |       pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 | 
61 | void deform_psroi_pooling_cuda_backward(
62 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 |     const int no_trans, const float spatial_scale, const int output_dim,
65 |     const int group_size, const int pooled_size, const int part_size,
66 |     const int sample_per_part, const float trans_std) 
67 | {
68 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 | 
71 |   const int batch = input.size(0);
72 |   const int channels = input.size(1);
73 |   const int height = input.size(2);
74 |   const int width = input.size(3);
75 |   const int channels_trans = no_trans ? 2 : trans.size(1);
76 | 
77 |   const int num_bbox = bbox.size(0);
78 |   if (num_bbox != out_grad.size(0))
79 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 |              out_grad.size(0), num_bbox);
81 | 
82 |   DeformablePSROIPoolBackwardAcc(
83 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 |       channels, height, width, num_bbox, channels_trans, no_trans,
85 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
86 |       sample_per_part, trans_std);
87 | }
88 | 


--------------------------------------------------------------------------------
/models/ops/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/models/ops/csrc/ml_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor ml_nms(const at::Tensor& dets,
11 |                   const at::Tensor& scores,
12 |                   const at::Tensor& labels,
13 |                   const float threshold) {
14 | 
15 |   if (dets.type().is_cuda()) {
16 | #ifdef WITH_CUDA
17 |     // TODO raise error if not compiled with CUDA
18 |     if (dets.numel() == 0)
19 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
20 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
21 |     return ml_nms_cuda(b, threshold);
22 | #else
23 |     AT_ERROR("Not compiled with GPU support");
24 | #endif
25 |   }
26 |   AT_ERROR("CPU version not implemented");
27 | }
28 | 


--------------------------------------------------------------------------------
/models/ops/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/models/ops/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "nms.h"
 2 | #include "ml_nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | #include "deform_conv.h"
 7 | #include "deform_pool.h"
 8 | #include "PoolPointsInterp.h"
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |   m.def("nms", &nms, "non-maximum suppression");
12 |   m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression");
13 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
14 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
15 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
16 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
17 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
18 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
19 |   m.def("pool_points_interp_forward", &PoolPointsInterp_forward, "PoolPointsInterp_forward");
20 |   m.def("pool_points_interp_backward", &PoolPointsInterp_backward, "PoolPointsInterp_backward");
21 |   // dcn-v2
22 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
23 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
24 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
25 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
26 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
27 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
28 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
29 | }
30 | 


--------------------------------------------------------------------------------
/models/ops/cython_bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps
74 | 


--------------------------------------------------------------------------------
/models/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | # 
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | # 


--------------------------------------------------------------------------------
/models/ops/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from models.ops import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data, 
43 |             rois, 
44 |             offset, 
45 |             output, 
46 |             output_count, 
47 |             ctx.no_trans,
48 |             ctx.spatial_scale, 
49 |             ctx.out_channels, 
50 |             ctx.group_size, 
51 |             ctx.out_size,
52 |             ctx.part_size, 
53 |             ctx.sample_per_part, 
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output, 
77 |             data, 
78 |             rois, 
79 |             offset, 
80 |             output_count, 
81 |             grad_input,
82 |             grad_offset, 
83 |             ctx.no_trans, 
84 |             ctx.spatial_scale, 
85 |             ctx.out_channels,
86 |             ctx.group_size, 
87 |             ctx.out_size, 
88 |             ctx.part_size, 
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/models/ops/dropblock.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn
 4 | 
 5 | 
 6 | class DropBlock2D(nn.Module):
 7 |     r"""Randomly zeroes spatial blocks of the input tensor.
 8 |     As described in the paper
 9 |     `DropBlock: A regularization method for convolutional networks`_ ,
10 |     dropping whole blocks of feature map allows to remove semantic
11 |     information as compared to regular dropout.
12 |     Args:
13 |         keep_prob (float, optional): probability of an element to be kept.
14 |         Authors recommend to linearly decrease this value from 1 to desired
15 |         value.
16 |         block_size (int, optional): size of the block. Block size in paper
17 |         usually equals last feature map dimensions.
18 |     Shape:
19 |         - Input: :math:`(N, C, H, W)`
20 |         - Output: :math:`(N, C, H, W)` (same shape as input)
21 |     .. _DropBlock: A regularization method for convolutional networks:
22 |        https://arxiv.org/abs/1810.12890
23 |     """
24 | 
25 |     def __init__(self, keep_prob=0.9, block_size=7):
26 |         super(DropBlock2D, self).__init__()
27 |         self.keep_prob = keep_prob
28 |         self.block_size = block_size
29 | 
30 |     def forward(self, input):
31 |         if not self.training or self.keep_prob == 1:
32 |             return input
33 |         gamma = (1. - self.keep_prob) / self.block_size ** 2
34 |         for sh in input.shape[2:]:
35 |             gamma *= sh / (sh - self.block_size + 1)
36 |         M = torch.bernoulli(torch.ones_like(input) * gamma)
37 |         Msum = F.conv2d(M,
38 |                         torch.ones((input.shape[1], 1, self.block_size, self.block_size)).to(device=input.device,
39 |                                                                                              dtype=input.dtype),
40 |                         padding=self.block_size // 2,
41 |                         groups=input.shape[1])
42 |         torch.set_printoptions(threshold=5000)
43 |         mask = (Msum < 1).to(device=input.device, dtype=input.dtype)
44 |         return input * mask * mask.numel() /mask.sum() #TODO input * mask * self.keep_prob ?
45 |        
46 | 


--------------------------------------------------------------------------------
/models/ops/iou_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class IOULoss(nn.Module):
 6 |     def __init__(self, loc_loss_type):
 7 |         super(IOULoss, self).__init__()
 8 |         self.loc_loss_type = loc_loss_type
 9 | 
10 |     def forward(self, pred, target, weight=None):
11 |         pred_left = pred[:, 0]
12 |         pred_top = pred[:, 1]
13 |         pred_right = pred[:, 2]
14 |         pred_bottom = pred[:, 3]
15 | 
16 |         target_left = target[:, 0]
17 |         target_top = target[:, 1]
18 |         target_right = target[:, 2]
19 |         target_bottom = target[:, 3]
20 | 
21 |         target_area = (target_left + target_right) * (target_top + target_bottom)
22 |         pred_area = (pred_left + pred_right) * (pred_top + pred_bottom)
23 | 
24 |         w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right)
25 |         h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top)
26 |         g_w_intersect = torch.max(pred_left, target_left) + torch.max(pred_right, target_right)
27 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top)
28 | 
29 |         area_intersect = w_intersect * h_intersect
30 |         area_union = target_area + pred_area - area_intersect
31 |         ac_uion = g_w_intersect * g_h_intersect + 1e-7
32 | 
33 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
34 |         gious = ious - (ac_uion - area_union) / ac_uion
35 | 
36 |         if self.loc_loss_type == 'iou':
37 |             losses = -torch.log(ious)
38 |         elif self.loc_loss_type == 'liou':
39 |             losses = 1 - ious
40 |         elif self.loc_loss_type == 'giou':
41 |             losses = 1 - gious
42 |         else:
43 |             raise NotImplementedError
44 | 
45 |         if weight is not None and weight.sum() > 0:
46 |             return (losses * weight).sum()
47 |         else:
48 |             assert losses.numel() != 0
49 |             return losses.sum()
50 | 


--------------------------------------------------------------------------------
/models/ops/l2_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def l2_loss(input, target):
 5 |     """
 6 |     very similar to the smooth_l1_loss from pytorch, but with
 7 |     the extra beta parameter
 8 |     """
 9 |     pos_inds = torch.nonzero(target > 0.0).squeeze(1)
10 |     if pos_inds.shape[0] > 0:
11 |         cond = torch.abs(input[pos_inds] - target[pos_inds])
12 |         loss = 0.5 * cond ** 2 / pos_inds.shape[0]
13 |     else:
14 |         loss = input * 0.0
15 |     return loss.sum()
16 | 


--------------------------------------------------------------------------------
/models/ops/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.init as init
 4 | 
 5 | 
 6 | class L2Norm(nn.Module):
 7 |     def __init__(self, n_channels, scale):
 8 |         super(L2Norm, self).__init__()
 9 |         self.n_channels = n_channels
10 |         self.gamma = scale or None
11 |         self.eps = 1e-10
12 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
13 |         self.reset_parameters()
14 | 
15 |     def reset_parameters(self):
16 |         init.constant_(self.weight, self.gamma)
17 | 
18 |     def forward(self, x):
19 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
20 |         x = x / norm
21 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
22 |         return out
23 | 


--------------------------------------------------------------------------------
/models/ops/label_smoothing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LabelSmoothing(nn.Module):
 6 |     """
 7 |     NLL loss with label smoothing.
 8 |     """
 9 | 
10 |     def __init__(self, smoothing=0.0):
11 |         """
12 |         Constructor for the LabelSmoothing module.
13 |         :param smoothing: label smoothing factor
14 |         """
15 |         super(LabelSmoothing, self).__init__()
16 |         self.confidence = 1.0 - smoothing
17 |         self.smoothing = smoothing
18 | 
19 |     def forward(self, x, target):
20 |         logprobs = torch.nn.functional.log_softmax(x, dim=-1)
21 | 
22 |         nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
23 |         nll_loss = nll_loss.squeeze(1)
24 |         smooth_loss = -logprobs.mean(dim=-1)
25 |         loss = self.confidence * nll_loss + self.smoothing * smooth_loss
26 |         return loss.mean()
27 | 


--------------------------------------------------------------------------------
/models/ops/lovasz_hinge_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from torch.autograd import Variable
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | def lovasz_grad(gt_sorted):
 9 |     """
10 |     Computes gradient of the Lovasz extension w.r.t sorted errors
11 |     See Alg. 1 in paper
12 |     """
13 |     p = len(gt_sorted)
14 |     gts = gt_sorted.sum()
15 |     intersection = gts - gt_sorted.float().cumsum(0)
16 |     union = gts + (1 - gt_sorted).float().cumsum(0)
17 |     jaccard = 1. - intersection / union
18 |     if p > 1: # cover 1-pixel case
19 |         jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
20 |     return jaccard
21 | 
22 | 
23 | class LovaszHinge(nn.Module):
24 |     def __init__(self, reduction='mean'):
25 |         super(LovaszHinge, self).__init__()
26 |         self.reduction = reduction
27 | 
28 |     def flatten(self, input, target, mask=None):
29 |         if mask is None:
30 |             input_flatten = input.view(-1)
31 |             target_flatten = target.view(-1)
32 |         else:
33 |             input_flatten = input[mask].view(-1)
34 |             target_flatten = target[mask].view(-1)
35 |         return input_flatten, target_flatten
36 | 
37 |     def lovasz_hinge_flat(self, logits, labels):
38 |         """
39 |         Binary Lovasz hinge loss
40 |           logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
41 |           labels: [P] Tensor, binary ground truth labels (0 or 1)
42 |           ignore: label to ignore
43 |         """
44 |         if len(labels) == 0:
45 |             # only void pixels, the gradients should be 0
46 |             return logits.sum() * 0.
47 |         signs = 2. * labels.float() - 1.
48 |         errors = (1. - logits * Variable(signs))
49 |         errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
50 |         perm = perm.data
51 |         gt_sorted = labels[perm]
52 |         grad = lovasz_grad(gt_sorted)
53 |         loss = torch.dot(F.relu(errors_sorted), Variable(grad))
54 |         return loss
55 | 
56 |     def forward(self, inputs, targets, mask=None, act=False):
57 |         losses = []
58 |         for id in range(len(inputs)):
59 |             if mask is not None:
60 |                 input_flatten, target_flatten = self.flatten(inputs[id], targets[id], mask[id])
61 |             else:
62 |                 input_flatten, target_flatten = self.flatten(inputs[id], targets[id])
63 |             if act:
64 |                 # map [0, 1] to [-inf, inf]
65 |                 input_flatten = torch.log(input_flatten) - torch.log(1 - input_flatten)
66 |             losses.append(self.lovasz_hinge_flat(input_flatten, target_flatten))
67 |         losses = torch.stack(losses).to(device=inputs.device)
68 |         if self.reduction == "mean":
69 |             losses = losses.mean()
70 |         elif self.reduction == "sum":
71 |             losses = losses.sum()
72 | 
73 |         return losses
74 | 


--------------------------------------------------------------------------------
/models/ops/misc.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn.modules.utils import _ntuple
 5 | 
 6 | 
 7 | class _NewEmptyTensorOp(torch.autograd.Function):
 8 |     @staticmethod
 9 |     def forward(ctx, x, new_shape):
10 |         ctx.shape = x.shape
11 |         return x.new_empty(new_shape)
12 | 
13 |     @staticmethod
14 |     def backward(ctx, grad):
15 |         shape = ctx.shape
16 |         return _NewEmptyTensorOp.apply(grad, shape), None
17 | 
18 | 
19 | class Conv2d(torch.nn.Conv2d):
20 |     def forward(self, x):
21 |         if x.numel() > 0:
22 |             return super(Conv2d, self).forward(x)
23 |         # get output shape
24 | 
25 |         output_shape = [
26 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
27 |             for i, p, di, k, d in zip(
28 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
29 |             )
30 |         ]
31 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
32 |         return _NewEmptyTensorOp.apply(x, output_shape)
33 | 
34 | 
35 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
36 |     def forward(self, x):
37 |         if x.numel() > 0:
38 |             return super(ConvTranspose2d, self).forward(x)
39 |         # get output shape
40 | 
41 |         output_shape = [
42 |             (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
43 |             for i, p, di, k, d, op in zip(
44 |                 x.shape[-2:],
45 |                 self.padding,
46 |                 self.dilation,
47 |                 self.kernel_size,
48 |                 self.stride,
49 |                 self.output_padding,
50 |             )
51 |         ]
52 |         output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
53 |         return _NewEmptyTensorOp.apply(x, output_shape)
54 | 
55 | 
56 | class BatchNorm2d(torch.nn.BatchNorm2d):
57 |     def forward(self, x):
58 |         if x.numel() > 0:
59 |             return super(BatchNorm2d, self).forward(x)
60 |         # get output shape
61 |         output_shape = x.shape
62 |         return _NewEmptyTensorOp.apply(x, output_shape)
63 | 
64 | 
65 | def interpolate(x, size=None, scale_factor=None, mode="nearest", align_corners=None):
66 |     if x.numel() > 0:
67 |         return torch.nn.functional.interpolate(
68 |             x, size, scale_factor, mode, align_corners
69 |         )
70 | 
71 |     def _check_size_scale_factor(dim):
72 |         if size is None and scale_factor is None:
73 |             raise ValueError("either size or scale_factor should be defined")
74 |         if size is not None and scale_factor is not None:
75 |             raise ValueError("only one of size or scale_factor should be defined")
76 |         if (
77 |             scale_factor is not None
78 |             and isinstance(scale_factor, tuple)
79 |             and len(scale_factor) != dim
80 |         ):
81 |             raise ValueError(
82 |                 "scale_factor shape must match input shape. "
83 |                 "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
84 |             )
85 | 
86 |     def _output_size(dim):
87 |         _check_size_scale_factor(dim)
88 |         if size is not None:
89 |             return size
90 |         scale_factors = _ntuple(dim)(scale_factor)
91 |         # math.floor might return float in py2.7
92 |         return [
93 |             int(math.floor(x.size(i + 2) * scale_factors[i])) for i in range(dim)
94 |         ]
95 | 
96 |     output_shape = tuple(_output_size(2))
97 |     output_shape = x.shape[:-2] + output_shape
98 |     return _NewEmptyTensorOp.apply(x, output_shape)
99 | 


--------------------------------------------------------------------------------
/models/ops/mixture_batchnorm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class H_Sigmoid(nn.Module):
  7 |     def forward(self, x):
  8 |         out = F.relu6(x + 3, inplace=True) / 6
  9 |         return out
 10 | 
 11 | 
 12 | def make_norm(c, norm='bn', group=1, eps=1e-5):
 13 |     if norm == 'bn':
 14 |         return nn.BatchNorm2d(c, eps=eps)
 15 |     elif norm == 'gn':
 16 |         assert c % group == 0
 17 |         return nn.GroupNorm(group, c, eps=eps)
 18 |     elif norm == 'none':
 19 |         return None
 20 |     else:
 21 |         return nn.BatchNorm2d(c, eps=eps)
 22 | 
 23 | 
 24 | class AttentionWeights(nn.Module):
 25 |     expansion = 2
 26 | 
 27 |     def __init__(self, num_channels, k, norm=None, groups=1, use_hsig=True):
 28 |         super(AttentionWeights, self).__init__()
 29 |         # num_channels *= 2
 30 |         self.k = k
 31 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
 32 |         self.attention = nn.Sequential(
 33 |             nn.Conv2d(num_channels, k, 1, bias=False),
 34 |             make_norm(k, norm, groups),
 35 |             H_Sigmoid() if use_hsig else nn.Sigmoid()
 36 |         )
 37 | 
 38 |     def forward(self, x):
 39 |         b, c, _, _ = x.size()
 40 |         y = self.avgpool(x)  # .view(b, c)
 41 |         var = torch.var(x, dim=(2, 3)).view(b, c, 1, 1)
 42 |         y *= (var + 1e-3).rsqrt()
 43 |         # y = torch.cat((y, var), dim=1)
 44 |         return self.attention(y).view(b, self.k)
 45 | 
 46 | 
 47 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ?
 48 | class MixtureBatchNorm2d(nn.BatchNorm2d):
 49 |     def __init__(self, num_channels, k, eps=1e-5, momentum=0.1, track_running_stats=True):
 50 |         super(MixtureBatchNorm2d, self).__init__(
 51 |             num_channels, eps=eps, momentum=momentum, affine=False, track_running_stats=track_running_stats)
 52 |         self.k = k
 53 |         self.weight_ = nn.Parameter(torch.Tensor(k, num_channels))
 54 |         self.bias_ = nn.Parameter(torch.Tensor(k, num_channels))
 55 | 
 56 |         self.attention_weights = AttentionWeights(num_channels, k, norm='bn')
 57 | 
 58 |         self._init_params()
 59 | 
 60 |     def _init_params(self):
 61 |         nn.init.normal_(self.weight_, 1, 0.1)
 62 |         nn.init.normal_(self.bias_, 0, 0.1)
 63 | 
 64 |     def forward(self, x):
 65 |         output = super(MixtureBatchNorm2d, self).forward(x)
 66 |         size = output.size()
 67 |         y = self.attention_weights(x)  # bxk # or use output as attention input
 68 | 
 69 |         weight = y @ self.weight_  # bxc
 70 |         bias = y @ self.bias_  # bxc
 71 |         weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size)
 72 |         bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size)
 73 | 
 74 |         return weight * output + bias
 75 | 
 76 | 
 77 | # Modified on top of nn.GroupNorm
 78 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ?
 79 | class MixtureGroupNorm(nn.Module):
 80 |     __constants__ = ['num_groups', 'num_channels', 'k', 'eps', 'weight', 'bias']
 81 | 
 82 |     def __init__(self, num_channels, num_groups, k, eps=1e-5):
 83 |         super(MixtureGroupNorm, self).__init__()
 84 |         self.num_groups = num_groups
 85 |         self.num_channels = num_channels
 86 |         self.k = k
 87 |         self.eps = eps
 88 |         self.affine = True
 89 |         self.weight_ = nn.Parameter(torch.Tensor(k, num_channels))
 90 |         self.bias_ = nn.Parameter(torch.Tensor(k, num_channels))
 91 |         self.register_parameter('weight', None)
 92 |         self.register_parameter('bias', None)
 93 | 
 94 |         self.attention_weights = AttentionWeights(num_channels, k, norm='gn', groups=1)
 95 | 
 96 |         self.reset_parameters()
 97 | 
 98 |     def reset_parameters(self):
 99 |         nn.init.normal_(self.weight_, 1, 0.1)
100 |         nn.init.normal_(self.bias_, 0, 0.1)
101 | 
102 |     def forward(self, x):
103 |         output = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
104 |         size = output.size()
105 | 
106 |         y = self.attention_weights(x)  # TODO: use output as attention input
107 | 
108 |         weight = y @ self.weight_
109 |         bias = y @ self.bias_
110 | 
111 |         weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size)
112 |         bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size)
113 | 
114 |         return weight * output + bias
115 | 
116 |     def extra_repr(self):
117 |         return '{num_groups}, {num_channels}, eps={eps}, ' \
118 |                'affine={affine}'.format(**self.__dict__)
119 | 


--------------------------------------------------------------------------------
/models/ops/nms.py:
--------------------------------------------------------------------------------
 1 | from models.ops import _C
 2 | 
 3 | from apex import amp
 4 | 
 5 | # Only valid with fp32 inputs - give AMP the hint
 6 | nms = amp.float_function(_C.nms)
 7 | ml_nms = amp.float_function(_C.ml_nms)
 8 | 
 9 | # nms.__doc__ = """
10 | # This function performs Non-maximum suppresion"""
11 | 


--------------------------------------------------------------------------------
/models/ops/pool_points_interp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from models.ops import _C
 7 | 
 8 | from apex import amp
 9 | 
10 | 
11 | class _PoolPointsInterp(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, spatial_scale):
14 |         ctx.save_for_backward(roi)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output = _C.pool_points_interp_forward(
18 |             input, roi, spatial_scale)
19 |         return output
20 | 
21 |     @staticmethod
22 |     @once_differentiable
23 |     def backward(ctx, grad_output):
24 |         rois, = ctx.saved_tensors
25 |         spatial_scale = ctx.spatial_scale
26 |         bs, ch, h, w = ctx.input_shape
27 |         grad_input = _C.pool_points_interp_backward(
28 |             grad_output,
29 |             rois,
30 |             spatial_scale,
31 |             bs,
32 |             ch,
33 |             h,
34 |             w,
35 |         )
36 |         return grad_input, None, None
37 | 
38 | 
39 | pool_points_interp = _PoolPointsInterp.apply
40 | 
41 | 
42 | class PoolPointsInterp(nn.Module):
43 |     def __init__(self, spatial_scale=1.0):
44 |         super(PoolPointsInterp, self).__init__()
45 |         self.spatial_scale = spatial_scale
46 | 
47 |     @amp.float_function
48 |     def forward(self, input, rois):
49 |         return pool_points_interp(input, rois, self.spatial_scale)
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__ + "("
53 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
54 |         tmpstr += ")"
55 |         return tmpstr
56 | 


--------------------------------------------------------------------------------
/models/ops/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     def __init__(self, init_value=1.0):
 7 |         super(Scale, self).__init__()
 8 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
 9 | 
10 |     def forward(self, input):
11 |         return input * self.scale
12 | 


--------------------------------------------------------------------------------
/models/ops/setup_rcnn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | import torch
 7 | from setuptools import find_packages
 8 | from setuptools import setup
 9 | from torch.utils.cpp_extension import CUDA_HOME
10 | from torch.utils.cpp_extension import CppExtension
11 | from torch.utils.cpp_extension import CUDAExtension
12 | 
13 | requirements = ["torch", "torchvision"]
14 | 
15 | 
16 | def get_extensions():
17 |     this_dir = os.path.dirname(os.path.abspath(__file__))
18 |     extensions_dir = os.path.join(this_dir, "csrc")
19 | 
20 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
21 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
22 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
23 | 
24 |     sources = main_file + source_cpu
25 |     extension = CppExtension
26 | 
27 |     extra_compile_args = {"cxx": []}
28 |     define_macros = []
29 | 
30 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
31 |         extension = CUDAExtension
32 |         sources += source_cuda
33 |         define_macros += [("WITH_CUDA", None)]
34 |         extra_compile_args["nvcc"] = [
35 |             "-DCUDA_HAS_FP16=1",
36 |             "-D__CUDA_NO_HALF_OPERATORS__",
37 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
38 |             "-D__CUDA_NO_HALF2_OPERATORS__",
39 |         ]
40 | 
41 |     sources = [os.path.join(extensions_dir, s) for s in sources]
42 | 
43 |     include_dirs = [extensions_dir]
44 | 
45 |     ext_modules = [
46 |         extension(
47 |             "_C",
48 |             sources,
49 |             include_dirs=include_dirs,
50 |             define_macros=define_macros,
51 |             extra_compile_args=extra_compile_args,
52 |         )
53 |     ]
54 | 
55 |     return ext_modules
56 | 
57 | 
58 | setup(
59 |     name="pet",
60 |     ext_modules=get_extensions(),
61 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
62 | )
63 | 


--------------------------------------------------------------------------------
/models/ops/setup_ssd.py:
--------------------------------------------------------------------------------
 1 | from Cython.Build import cythonize
 2 | from Cython.Distutils import build_ext
 3 | from setuptools import Extension
 4 | from setuptools import setup
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | # Obtain the numpy include directory.  This logic works across numpy versions.
10 | try:
11 |     numpy_include = np.get_include()
12 | except AttributeError:
13 |     numpy_include = np.get_numpy_include()
14 | 
15 | 
16 | ext_modules = [
17 |     Extension(
18 |         name='cython_bbox',
19 |         sources=['cython_bbox.pyx'],
20 |         extra_compile_args=['-Wno-cpp'],
21 |         include_dirs=[numpy_include]
22 |     ),
23 |     Extension(
24 |         name='cython_nms',
25 |         sources=['cython_nms.pyx'],
26 |         extra_compile_args=['-Wno-cpp'],
27 |         include_dirs=[numpy_include]
28 |     )
29 | ]
30 | 
31 | setup(
32 |     name='pet',
33 |     ext_modules=cythonize(ext_modules)
34 | )
35 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from models.ops import _C
 7 | 
 8 | 
 9 | # TODO: Use JIT to replace CUDA implementation in the future.
10 | class _SigmoidFocalLoss(Function):
11 |     @staticmethod
12 |     def forward(ctx, logits, targets, gamma, alpha):
13 |         ctx.save_for_backward(logits, targets)
14 |         num_classes = logits.shape[1]
15 |         ctx.num_classes = num_classes
16 |         ctx.gamma = gamma
17 |         ctx.alpha = alpha
18 | 
19 |         losses = _C.sigmoid_focalloss_forward(
20 |             logits, targets, num_classes, gamma, alpha
21 |         )
22 |         return losses
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, d_loss):
27 |         logits, targets = ctx.saved_tensors
28 |         num_classes = ctx.num_classes
29 |         gamma = ctx.gamma
30 |         alpha = ctx.alpha
31 |         d_loss = d_loss.contiguous()
32 |         d_logits = _C.sigmoid_focalloss_backward(
33 |             logits, targets, d_loss, num_classes, gamma, alpha
34 |         )
35 |         return d_logits, None, None, None, None
36 | 
37 | 
38 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
39 | 
40 | 
41 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
42 |     num_classes = logits.shape[1]
43 |     gamma = gamma[0]
44 |     alpha = alpha[0]
45 |     dtype = targets.dtype
46 |     device = targets.device
47 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
48 | 
49 |     t = targets.unsqueeze(1)
50 |     p = torch.sigmoid(logits)
51 |     term1 = (1 - p) ** gamma * torch.log(p)
52 |     term2 = p ** gamma * torch.log(1 - p)
53 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
54 | 
55 | 
56 | class SigmoidFocalLoss(nn.Module):
57 |     def __init__(self, gamma, alpha):
58 |         super(SigmoidFocalLoss, self).__init__()
59 |         self.gamma = gamma
60 |         self.alpha = alpha
61 | 
62 |     def forward(self, logits, targets):
63 |         device = logits.device
64 |         if logits.is_cuda:
65 |             loss_func = sigmoid_focal_loss_cuda
66 |         else:
67 |             loss_func = sigmoid_focal_loss_cpu
68 | 
69 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
70 |         return loss.sum()
71 | 
72 |     def __repr__(self):
73 |         tmpstr = self.__class__.__name__ + "("
74 |         tmpstr += "gamma=" + str(self.gamma)
75 |         tmpstr += ", alpha=" + str(self.alpha)
76 |         tmpstr += ")"
77 |         return tmpstr
78 | 


--------------------------------------------------------------------------------
/models/ops/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 5 |     """
 6 |     very similar to the smooth_l1_loss from pytorch, but with
 7 |     the extra beta parameter
 8 | 
 9 |     Modified according to detectron2's fvcore,
10 |     refer to https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
11 |     """
12 |     if beta < 1e-5:
13 |         # if beta == 0, then torch.where will result in nan gradients when
14 |         # the chain rule is applied due to pytorch implementation details
15 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
16 |         # zeros, rather than "no gradient"). To avoid this issue, we define
17 |         # small values of beta to be exactly l1 loss.
18 |         loss = torch.abs(input - target)
19 |     else:
20 |         n = torch.abs(input - target)
21 |         cond = n < beta
22 |         loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
23 | 
24 |     if size_average:
25 |         return loss.mean()
26 |     return loss.sum()
27 | 
28 | 
29 | def smooth_l1_loss_LW(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0):
30 |     """
31 |     SmoothL1(x) = 0.5 * x^2 / beta      if |x| < beta
32 |                   |x| - 0.5 * beta      otherwise.
33 |     1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])).
34 |     N is the number of batch elements in the input predictions
35 |     """
36 |     box_diff = bbox_pred - bbox_targets
37 |     in_box_diff = bbox_inside_weights * box_diff
38 |     abs_in_box_diff = torch.abs(in_box_diff)
39 |     smoothL1_sign = (abs_in_box_diff < beta).detach().float()
40 |     in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \
41 |                   (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta))
42 |     out_loss_box = bbox_outside_weights * in_loss_box
43 |     loss_box = out_loss_box
44 |     N = loss_box.size(0)  # batch size
45 |     loss_box = loss_box.view(-1).sum(0) / N
46 |     return loss_box


--------------------------------------------------------------------------------
/models/ops/squeeze_excitation.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class SeConv2d(nn.Module):
 5 |     def __init__(self, inplanes, innerplanse, activation=nn.ReLU):
 6 |         super(SeConv2d, self).__init__()
 7 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 8 |         self.conv = nn.Sequential(
 9 |             nn.Conv2d(inplanes, innerplanse, kernel_size=1),
10 |             activation(),
11 |             nn.Conv2d(innerplanse, inplanes, kernel_size=1),
12 |             nn.Sigmoid()
13 |         )
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         # weight initialization
18 |         for m in self.modules():
19 |             if isinstance(m, nn.Conv2d):
20 |                 nn.init.constant_(m.weight, 0)
21 |                 if m.bias is not None:
22 |                     nn.init.zeros_(m.bias)
23 | 
24 |     def forward(self, x):
25 |         n, c, _, _ = x.size()
26 |         y = self.avg_pool(x)
27 |         y = self.conv(y)
28 |         return x * y
29 | 
30 |     
31 | class GDWSe2d(nn.Module):
32 |     def __init__(self, inplanes, kernel=3, reduction=16, with_padding=False):
33 |         super(GDWSe2d, self).__init__()
34 |         if with_padding:
35 |             padding = kernel // 2
36 |         else:
37 |             padding = 0
38 |          
39 |         self.globle_dw = nn.Conv2d(inplanes, inplanes, kernel_size=kernel, padding=padding, stride=1,
40 |                                    groups=inplanes, bias=False)
41 |         self.bn = nn.BatchNorm2d(inplanes)
42 |         self.relu = nn.ReLU(inplace=True)
43 |         
44 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
45 |         self.fc = nn.Sequential(
46 |             nn.Linear(inplanes, inplanes // reduction),
47 |             nn.ReLU(inplace=True),
48 |             nn.Linear(inplanes // reduction, inplanes),
49 |             nn.Sigmoid()
50 |         )
51 |         
52 |         self._init_weights()
53 | 
54 |     def _init_weights(self):
55 |         # weight initialization
56 |         for m in self.modules():
57 |             if isinstance(m, nn.Conv2d):
58 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
59 |                 if m.bias is not None:
60 |                     nn.init.zeros_(m.bias)
61 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
62 |                 nn.init.constant_(m.weight, 1)
63 |                 nn.init.constant_(m.bias, 0)
64 |             elif isinstance(m, nn.Linear):
65 |                 nn.init.normal_(m.weight, 0, 0.01)
66 |                 nn.init.constant_(m.bias, 0)
67 | 
68 |     def forward(self, x):
69 |         y = self.globle_dw(x)
70 |         y = self.bn(y)
71 |         y = self.relu(y)
72 |         
73 |         n, c, _, _ = x.size()
74 |         y = self.avg_pool(y).view(n, c)
75 |         y = self.fc(y).view(n, c, 1, 1)
76 |         return x * y.expand_as(x)
77 | 


--------------------------------------------------------------------------------
/rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/core/__init__.py


--------------------------------------------------------------------------------
/rcnn/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .transform import build_transforms
2 | from .dataset import build_dataset
3 | from .dataset import make_train_data_loader
4 | from .dataset import make_test_data_loader
5 | from .evaluation import evaluation
6 | from .evaluation import post_processing


--------------------------------------------------------------------------------
/rcnn/datasets/dataset_catalog.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from utils.data.dataset_catalog import COMMON_DATASETS
 4 | 
 5 | # Root directory of project
 6 | ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
 7 | 
 8 | # Path to data dir
 9 | _DATA_DIR = osp.abspath(osp.join(ROOT_DIR, 'data'))
10 | 
11 | # Required dataset entry keys
12 | _IM_DIR = 'image_directory'
13 | _ANN_FN = 'annotation_file'
14 | 
15 | # Available datasets
16 | _DATASETS = {
17 | }
18 | _DATASETS.update(COMMON_DATASETS)
19 | 
20 | 
21 | def datasets():
22 |     """Retrieve the list of available dataset names."""
23 |     return _DATASETS.keys()
24 | 
25 | 
26 | def contains(name):
27 |     """Determine if the dataset is in the catalog."""
28 |     return name in _DATASETS.keys()
29 | 
30 | 
31 | def get_im_dir(name):
32 |     """Retrieve the image directory for the dataset."""
33 |     return _DATASETS[name][_IM_DIR]
34 | 
35 | 
36 | def get_ann_fn(name):
37 |     """Retrieve the annotation file for the dataset."""
38 |     return _DATASETS[name][_ANN_FN]
39 | 


--------------------------------------------------------------------------------
/rcnn/datasets/transform.py:
--------------------------------------------------------------------------------
 1 | from utils.data import transforms as T
 2 | 
 3 | from rcnn.core.config import cfg
 4 | 
 5 | 
 6 | def build_transforms(is_train=True):
 7 |     if is_train:
 8 |         min_size = cfg.TRAIN.SCALES
 9 |         max_size = cfg.TRAIN.MAX_SIZE
10 |         flip_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
11 |         brightness = cfg.TRAIN.BRIGHTNESS
12 |         contrast = cfg.TRAIN.CONTRAST
13 |         saturation = cfg.TRAIN.SATURATION
14 |         hue = cfg.TRAIN.HUE
15 |         left_right = cfg.TRAIN.LEFT_RIGHT
16 | 
17 |         # for force resize
18 |         force_test_scale = [-1, -1]
19 |         scale_ratios = cfg.TRAIN.RANDOM_CROP.SCALE_RATIOS
20 | 
21 |         # for random crop
22 |         preprocess_type = cfg.TRAIN.PREPROCESS_TYPE
23 | 
24 |         crop_sizes = cfg.TRAIN.RANDOM_CROP.CROP_SCALES
25 |         crop_iou_ths = cfg.TRAIN.RANDOM_CROP.IOU_THS
26 |         pad_pixel = cfg.TRAIN.RANDOM_CROP.PAD_PIXEL
27 |         pad_pixel = (cfg.PIXEL_MEANS if len(pad_pixel) < 3 else pad_pixel)
28 |     else:
29 |         min_size = cfg.TEST.SCALE
30 |         max_size = cfg.TEST.MAX_SIZE
31 |         flip_prob = 0
32 |         brightness = 0.0
33 |         contrast = 0.0
34 |         saturation = 0.0
35 |         hue = 0.0
36 |         left_right = ()
37 | 
38 |         # for force resize
39 |         force_test_scale = cfg.TEST.FORCE_TEST_SCALE
40 |         scale_ratios = ()
41 | 
42 |         # for random crop
43 |         preprocess_type = "none"
44 | 
45 |         crop_sizes = ()
46 |         pad_pixel = ()
47 |         crop_iou_ths = ()
48 | 
49 |     to_bgr255 = cfg.TO_BGR255
50 |     normalize_transform = T.Normalize(
51 |         mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS, to_bgr255=to_bgr255
52 |     )
53 | 
54 |     color_jitter = T.ColorJitter(
55 |         brightness=brightness,
56 |         contrast=contrast,
57 |         saturation=saturation,
58 |         hue=hue,
59 |     )
60 | 
61 |     transform = T.Compose(
62 |         [
63 |             color_jitter,
64 |             T.Resize(min_size, max_size, preprocess_type, scale_ratios, force_test_scale),
65 |             T.RandomCrop(preprocess_type, crop_sizes, pad_pixel, crop_iou_ths),
66 |             T.RandomHorizontalFlip(flip_prob, left_right),
67 |             T.ToTensor(),
68 |             normalize_transform,
69 |         ]
70 |     )
71 |     return transform
72 | 


--------------------------------------------------------------------------------
/rcnn/modeling/backbone/MobileNet_v2.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | import models.imagenet.mobilenet_v2 as mv2
 6 | import models.ops as ops
 7 | from models.imagenet.utils import make_divisible
 8 | from utils.net import freeze_params, make_norm
 9 | from rcnn.modeling import registry
10 | from rcnn.core.config import cfg
11 | 
12 | 
13 | class MobileNetV2(mv2.MobileNetV2):
14 |     def __init__(self, norm='bn', activation=nn.ReLU6, stride=32):
15 |         """ Constructor
16 |         """
17 |         super(MobileNetV2, self).__init__()
18 |         block = mv2.LinearBottleneck
19 |         self.use_se = cfg.BACKBONE.MV2.USE_SE
20 |         self.widen_factor = cfg.BACKBONE.MV2.WIDEN_FACTOR
21 |         self.norm = norm
22 |         self.activation_type = activation
23 |         try:
24 |             self.activation = activation(inplace=True)
25 |         except:
26 |             self.activation = activation()
27 |         self.stride = stride
28 | 
29 |         layers_cfg = mv2.model_se(mv2.MV2_CFG['A']) if self.use_se else mv2.MV2_CFG['A']
30 |         num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]]
31 |         self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels]
32 |         self.layers = [len(lc) for lc in layers_cfg[2:-1]]
33 | 
34 |         self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8)
35 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4],
36 |                                padding=layers_cfg[0][0][0] // 2, bias=False)
37 |         self.bn1 = make_norm(self.inplanes, norm=self.norm)
38 | 
39 |         self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1)
40 |         self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1)
41 |         self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1)
42 |         self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1)
43 |         self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1)
44 | 
45 |         self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
46 |         self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
47 | 
48 |         del self.conv_out
49 |         del self.bn_out
50 |         del self.avgpool
51 |         del self.fc
52 |         self._init_weights()
53 |         self._init_modules()
54 | 
55 |     def _init_modules(self):
56 |         assert cfg.BACKBONE.MV2.FREEZE_AT in [0, 2, 3, 4, 5]  # cfg.BACKBONE.MV2.FREEZE_AT: 2
57 |         assert cfg.BACKBONE.MV2.FREEZE_AT <= len(self.layers) + 1
58 |         if cfg.BACKBONE.MV2.FREEZE_AT > 0:
59 |             freeze_params(getattr(self, 'conv1'))
60 |             freeze_params(getattr(self, 'bn1'))
61 |         for i in range(0, cfg.BACKBONE.MV2.FREEZE_AT):
62 |             freeze_params(getattr(self, 'layer%d' % i))
63 |         # Freeze all bn (affine) layers !!!
64 |         self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
65 | 
66 |     def train(self, mode=True):
67 |         # Override train mode
68 |         self.training = mode
69 |         if cfg.BACKBONE.MV2.FREEZE_AT < 1:
70 |             getattr(self, 'conv1').train(mode)
71 |             getattr(self, 'bn1').train(mode)
72 |         for i in range(cfg.BACKBONE.MV2.FREEZE_AT, len(self.layers) + 1):
73 |             getattr(self, 'layer%d' % i).train(mode)
74 | 
75 |     def forward(self, x):
76 |         x = self.conv1(x)
77 |         x = self.bn1(x)
78 |         x = self.activation(x)
79 | 
80 |         x = self.layer0(x)
81 |         x2 = self.layer1(x)
82 |         x3 = self.layer2(x2)
83 |         x4 = self.layer3(x3)
84 |         x5 = self.layer4(x4)
85 | 
86 |         return [x2, x3, x4, x5]
87 | 
88 | 
89 | # ---------------------------------------------------------------------------- #
90 | # MobileNetV2 Conv Body
91 | # ---------------------------------------------------------------------------- #
92 | @registry.BACKBONES.register("mobilenet_v2")
93 | def mobilenet_v2():
94 |     model = MobileNetV2()
95 |     return model
96 | 


--------------------------------------------------------------------------------
/rcnn/modeling/backbone/MobileNet_v3.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | import models.imagenet.mobilenet_v3 as mv3
  7 | import models.ops as ops
  8 | from models.imagenet.utils import make_divisible, convert_conv2convsamepadding_model
  9 | from utils.net import freeze_params, make_norm
 10 | from rcnn.modeling import registry
 11 | from rcnn.core.config import cfg
 12 | 
 13 | 
 14 | class MobileNetV3(mv3.MobileNetV3):
 15 |     def __init__(self, norm='bn', activation=mv3.H_Swish, stride=32):
 16 |         """ Constructor
 17 |         """
 18 |         super(MobileNetV3, self).__init__()
 19 |         block = mv3.LinearBottleneck
 20 |         self.widen_factor = cfg.BACKBONE.MV3.WIDEN_FACTOR
 21 |         self.norm = norm
 22 |         self.se_reduce_mid = cfg.BACKBONE.MV3.SE_REDUCE_MID
 23 |         self.se_divisible = cfg.BACKBONE.MV3.SE_DIVISIBLE
 24 |         self.head_use_bias = cfg.BACKBONE.MV3.HEAD_USE_BIAS
 25 |         self.force_residual = cfg.BACKBONE.MV3.FORCE_RESIDUAL
 26 |         self.sync_se_act = cfg.BACKBONE.MV3.SYNC_SE_ACT
 27 |         self.bn_eps = cfg.BACKBONE.BN_EPS
 28 |         self.activation_type = activation
 29 |         self.stride = stride
 30 | 
 31 |         setting = cfg.BACKBONE.MV3.SETTING
 32 |         layers_cfg = mv3.MV3_CFG[setting]
 33 |         num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]]
 34 |         self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels]
 35 |         self.activation = activation() if layers_cfg[0][0][3] else nn.ReLU(inplace=True)
 36 |         self.layers = [len(lc) for lc in layers_cfg[2:-1]]
 37 | 
 38 |         self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8)
 39 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4],
 40 |                                padding=layers_cfg[0][0][0] // 2, bias=False)
 41 |         self.bn1 = make_norm(self.inplanes, norm=self.norm, eps=self.bn_eps)
 42 | 
 43 |         self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1) if layers_cfg[1][0][0] else None
 44 |         self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1)
 45 |         self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1)
 46 |         self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1)
 47 |         self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1)
 48 | 
 49 |         self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
 50 |         self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
 51 | 
 52 |         del self.last_stage
 53 |         del self.avgpool
 54 |         del self.conv_out
 55 |         del self.fc
 56 |         self._init_weights()
 57 |         self._init_modules()
 58 | 
 59 |     def _init_modules(self):
 60 |         assert cfg.BACKBONE.MV3.FREEZE_AT in [0, 2, 3, 4, 5]  # cfg.BACKBONE.MV3.FREEZE_AT: 2
 61 |         assert cfg.BACKBONE.MV3.FREEZE_AT <= len(self.layers) + 1
 62 |         if cfg.BACKBONE.MV3.FREEZE_AT > 0:
 63 |             freeze_params(getattr(self, 'conv1'))
 64 |             freeze_params(getattr(self, 'bn1'))
 65 |         for i in range(0, cfg.BACKBONE.MV3.FREEZE_AT):
 66 |             if i == 0:
 67 |                 freeze_params(getattr(self, 'layer0')) if self.layer0 is not None else None
 68 |             else:
 69 |                 freeze_params(getattr(self, 'layer%d' % i))
 70 |         # Freeze all bn (affine) layers !!!
 71 |         self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
 72 | 
 73 |     def train(self, mode=True):
 74 |         # Override train mode
 75 |         self.training = mode
 76 |         if cfg.BACKBONE.MV3.FREEZE_AT < 1:
 77 |             getattr(self, 'conv1').train(mode)
 78 |             getattr(self, 'bn1').train(mode)
 79 |         for i in range(cfg.BACKBONE.MV3.FREEZE_AT, len(self.layers) + 1):
 80 |             if i == 0:
 81 |                 getattr(self, 'layer0').train(mode) if self.layer0 is not None else None
 82 |             else:
 83 |                 getattr(self, 'layer%d' % i).train(mode)
 84 | 
 85 |     def forward(self, x):
 86 |         x = self.conv1(x)
 87 |         x = self.bn1(x)
 88 |         x = self.activation(x)
 89 | 
 90 |         if self.layer0 is not None:
 91 |             x = self.layer0(x)
 92 |         x2 = self.layer1(x)
 93 |         x3 = self.layer2(x2)
 94 |         x4 = self.layer3(x3)
 95 |         x5 = self.layer4(x4)
 96 | 
 97 |         return [x2, x3, x4, x5]
 98 | 
 99 | 
100 | # ---------------------------------------------------------------------------- #
101 | # MobileNet V3 Conv Body
102 | # ---------------------------------------------------------------------------- #
103 | @registry.BACKBONES.register("mobilenet_v3")
104 | def mobilenet_v3():
105 |     model = MobileNetV3()
106 |     if cfg.BACKBONE.MV3.SAME_PAD:
107 |         model = convert_conv2convsamepadding_model(model)
108 |     return model
109 | 


--------------------------------------------------------------------------------
/rcnn/modeling/backbone/VoVNet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | import models.imagenet.vovnet as vov
  6 | import models.ops as ops
  7 | from utils.net import freeze_params, make_norm
  8 | from rcnn.modeling import registry
  9 | from rcnn.core.config import cfg
 10 | 
 11 | 
 12 | def get_norm():
 13 |     norm = 'bn'
 14 |     if cfg.BACKBONE.VOV.USE_GN:
 15 |         norm = 'gn'
 16 |     return norm
 17 | 
 18 | 
 19 | class VoVNet(vov.VoVNet):
 20 |     def __init__(self, norm='bn', stride=32):
 21 |         """ Constructor
 22 |         """
 23 |         super(VoVNet, self).__init__()
 24 |         block = vov.OSABlock
 25 |         self.num_conv = cfg.BACKBONE.VOV.NUM_CONV   # 5
 26 |         self.norm = norm
 27 |         self.stride = stride
 28 | 
 29 |         base_width = cfg.BACKBONE.VOV.WIDTH    # 64
 30 |         stage_dims = cfg.BACKBONE.VOV.STAGE_DIMS
 31 |         concat_dims = cfg.BACKBONE.VOV.CONCAT_DIMS
 32 |         layers = cfg.BACKBONE.VOV.LAYERS
 33 |         self.layers = layers
 34 |         stage_with_conv = cfg.BACKBONE.VOV.STAGE_WITH_CONV
 35 |         self.channels = [base_width] + list(concat_dims)
 36 | 
 37 |         self.inplanes = base_width
 38 |         self.conv1 = nn.Conv2d(3, self.inplanes, 3, 2, 1, bias=False)
 39 |         self.bn1 = make_norm(self.inplanes, norm=self.norm)
 40 |         self.conv2 = nn.Conv2d(self.inplanes, self.inplanes, 3, 1, 1, bias=False)
 41 |         self.bn2 = make_norm(self.inplanes, norm=self.norm)
 42 |         self.conv3 = nn.Conv2d(self.inplanes, self.inplanes * 2, 3, 2, 1, bias=False)
 43 |         self.bn3 = make_norm(self.inplanes * 2, norm=self.norm)
 44 |         self.relu = nn.ReLU(inplace=True)
 45 |         self.inplanes = self.inplanes * 2
 46 | 
 47 |         self.layer1 = self._make_layer(block, stage_dims[0], concat_dims[0], layers[0], 1, conv=stage_with_conv[0])
 48 |         self.layer2 = self._make_layer(block, stage_dims[1], concat_dims[1], layers[1], 2, conv=stage_with_conv[1])
 49 |         self.layer3 = self._make_layer(block, stage_dims[2], concat_dims[2], layers[2], 2, conv=stage_with_conv[2])
 50 |         self.layer4 = self._make_layer(block, stage_dims[3], concat_dims[3], layers[3], 2, conv=stage_with_conv[3])
 51 | 
 52 |         self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
 53 |         self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
 54 | 
 55 |         del self.avgpool
 56 |         del self.fc
 57 |         self._init_weights()
 58 |         self._init_modules()
 59 | 
 60 |     def _init_modules(self):
 61 |         assert cfg.BACKBONE.VOV.FREEZE_AT in [0, 2, 3, 4, 5]  # cfg.BACKBONE.VOV.FREEZE_AT: 2
 62 |         assert cfg.BACKBONE.VOV.FREEZE_AT <= len(self.layers) + 1
 63 |         if cfg.BACKBONE.VOV.FREEZE_AT > 0:
 64 |             freeze_params(getattr(self, 'conv1'))
 65 |             freeze_params(getattr(self, 'bn1'))
 66 |             freeze_params(getattr(self, 'conv2'))
 67 |             freeze_params(getattr(self, 'bn2'))
 68 |             freeze_params(getattr(self, 'conv3'))
 69 |             freeze_params(getattr(self, 'bn3'))
 70 |         for i in range(1, cfg.BACKBONE.VOV.FREEZE_AT):
 71 |             freeze_params(getattr(self, 'layer%d' % i))
 72 |         # Freeze all bn (affine) layers !!!
 73 |         self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
 74 | 
 75 |     def train(self, mode=True):
 76 |         # Override train mode
 77 |         self.training = mode
 78 |         if cfg.BACKBONE.VOV.FREEZE_AT < 1:
 79 |             getattr(self, 'conv1').train(mode)
 80 |             getattr(self, 'bn1').train(mode)
 81 |             getattr(self, 'conv2').train(mode)
 82 |             getattr(self, 'bn2').train(mode)
 83 |             getattr(self, 'conv3').train(mode)
 84 |             getattr(self, 'bn3').train(mode)
 85 |         for i in range(cfg.BACKBONE.VOV.FREEZE_AT, len(self.layers) + 1):
 86 |             if i == 0:
 87 |                 continue
 88 |             getattr(self, 'layer%d' % i).train(mode)
 89 | 
 90 |     def forward(self, x):
 91 |         x = self.conv1(x)
 92 |         x = self.bn1(x)
 93 |         x = self.relu(x)
 94 |         x = self.conv2(x)
 95 |         x = self.bn2(x)
 96 |         x = self.relu(x)
 97 |         x = self.conv3(x)
 98 |         x = self.bn3(x)
 99 |         x = self.relu(x)
100 | 
101 |         x2 = self.layer1(x)
102 |         x3 = self.layer2(x2)
103 |         x4 = self.layer3(x3)
104 |         x5 = self.layer4(x4)
105 | 
106 |         return [x2, x3, x4, x5]
107 | 
108 | # ---------------------------------------------------------------------------- #
109 | # VoVNet Conv Body
110 | # ---------------------------------------------------------------------------- #
111 | @registry.BACKBONES.register("vovnet")
112 | def vovnet():
113 |     model = VoVNet(norm=get_norm())
114 |     return model
115 | 


--------------------------------------------------------------------------------
/rcnn/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .HRNet import *
2 | from .MobileNet_v1 import *
3 | from .MobileNet_v2 import *
4 | from .MobileNet_v3 import *
5 | from .ResNet import *
6 | from .ResNeXt import *
7 | from .VoVNet import *
8 | 


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/cascade_rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/cascade_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from rcnn.modeling.cascade_rcnn import heads
 5 | from rcnn.modeling.cascade_rcnn import outputs
 6 | from rcnn.modeling.cascade_rcnn.inference import box_post_processor
 7 | from rcnn.modeling.cascade_rcnn.loss import box_loss_evaluator
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | class CascadeRCNN(torch.nn.Module):
13 |     """
14 |     Generic Box Head class.
15 |     """
16 |     def __init__(self, dim_in, spatial_scale):
17 |         super(CascadeRCNN, self).__init__()
18 |         self.num_stage = cfg.CASCADE_RCNN.NUM_STAGE
19 |         self.test_stage = cfg.CASCADE_RCNN.TEST_STAGE
20 |         self.stage_loss_weights = cfg.CASCADE_RCNN.STAGE_WEIGHTS
21 |         self.test_ensemble = cfg.CASCADE_RCNN.TEST_ENSEMBLE
22 | 
23 |         head = registry.ROI_CASCADE_HEADS[cfg.CASCADE_RCNN.ROI_BOX_HEAD]
24 |         output = registry.ROI_CASCADE_OUTPUTS[cfg.CASCADE_RCNN.ROI_BOX_OUTPUT]
25 | 
26 |         for stage in range(1, self.num_stage + 1):
27 |             stage_name = '_{}'.format(stage)
28 |             setattr(self, 'Box_Head' + stage_name, head(dim_in, spatial_scale))
29 |             setattr(self, 'Output' + stage_name, output(getattr(self, 'Box_Head' + stage_name).dim_out))
30 | 
31 |     def forward(self, features, proposals, targets=None):
32 |         if self.training:
33 |             return self._forward_train(features, proposals, targets)
34 |         else:
35 |             return self._forward_test(features, proposals)
36 | 
37 |     def _forward_train(self, features, proposals, targets=None):
38 |         all_loss = dict()
39 |         for i in range(self.num_stage):
40 |             head = getattr(self, 'Box_Head_{}'.format(i + 1))
41 |             output = getattr(self, 'Output_{}'.format(i + 1))
42 |             loss_evaluator = box_loss_evaluator(i)
43 | 
44 |             # Cascade R-CNN subsamples during training the proposals with a fixed
45 |             # positive / negative ratio
46 |             with torch.no_grad():
47 |                 proposals = loss_evaluator.subsample(proposals, targets)
48 | 
49 |             # extract features that will be fed to the final classifier. The
50 |             # feature_extractor generally corresponds to the pooler + heads
51 |             x = head(features, proposals)
52 |             # final classifier that converts the features into predictions
53 |             class_logits, box_regression = output(x)
54 | 
55 |             loss_classifier, loss_box_reg = loss_evaluator([class_logits], [box_regression])
56 |             loss_scalar = self.stage_loss_weights[i]
57 |             all_loss['s{}_cls_loss'.format(i + 1)] = loss_classifier * loss_scalar
58 |             all_loss['s{}_bbox_loss'.format(i + 1)] = loss_box_reg * loss_scalar
59 | 
60 |             with torch.no_grad():
61 |                 if i < self.num_stage - 1:
62 |                     post_processor_train = box_post_processor(i, is_train=True)
63 |                     proposals = post_processor_train((class_logits, box_regression), proposals, targets)
64 | 
65 |         return x, proposals, all_loss
66 | 
67 |     def _forward_test(self, features, proposals):
68 |         ms_scores = []
69 |         for i in range(self.num_stage):
70 |             head = getattr(self, 'Box_Head_{}'.format(i + 1))
71 |             output = getattr(self, 'Output_{}'.format(i + 1))
72 |             post_processor_test = box_post_processor(i, is_train=False)
73 |             # extract features that will be fed to the final classifier. The
74 |             # feature_extractor generally corresponds to the pooler + heads
75 |             x = head(features, proposals)
76 |             # final classifier that converts the features into predictions
77 |             class_logits, box_regression = output(x)
78 |             ms_scores.append(class_logits)
79 | 
80 |             if i < self.test_stage - 1:
81 |                 proposals = post_processor_test((class_logits, box_regression), proposals)
82 |             else:
83 |                 if self.test_ensemble:
84 |                     assert len(ms_scores) == self.test_stage
85 |                     class_logits = sum(ms_scores) / self.test_stage
86 |                 result = post_processor_test((class_logits, box_regression), proposals)
87 |                 return x, result, {}
88 | 


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convfc_heads import *
2 | from .mlp_heads import *
3 | 
4 | 


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/convfc_heads.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from models.imagenet.utils import convert_conv2convws_model
 5 | from utils.net import make_conv, make_fc
 6 | from rcnn.utils.poolers import Pooler
 7 | from rcnn.modeling import registry
 8 | from rcnn.core.config import cfg
 9 | 
10 | 
11 | @registry.ROI_CASCADE_HEADS.register("roi_xconv1fc_head")
12 | class roi_xconv1fc_head(nn.Module):
13 |     """Add a X conv + 1fc head"""
14 | 
15 |     def __init__(self, dim_in, spatial_scale):
16 |         super().__init__()
17 |         self.dim_in = dim_in[-1]
18 | 
19 |         method = cfg.FAST_RCNN.ROI_XFORM_METHOD
20 |         resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
21 |         sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
22 |         pooler = Pooler(
23 |             method=method,
24 |             output_size=resolution,
25 |             scales=spatial_scale,
26 |             sampling_ratio=sampling_ratio,
27 |         )
28 |         self.pooler = pooler
29 | 
30 |         use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
31 |         use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
32 |         use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
33 |         conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
34 |         num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
35 |         dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
36 |         
37 |         xconvs = []
38 |         for ix in range(num_stacked_convs):
39 |             xconvs.append(
40 |                 make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
41 |                           use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
42 |             )
43 |             self.dim_in = conv_dim
44 |         self.add_module("xconvs", nn.Sequential(*xconvs))
45 |         
46 |         input_size = self.dim_in * resolution[0] * resolution[1]
47 |         mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
48 |         self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
49 |         self.dim_out = mlp_dim
50 | 
51 |         if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
52 |             self = convert_conv2convws_model(self)
53 |             
54 |     def forward(self, x, proposals):
55 |         x = self.pooler(x, proposals)
56 |         x = self.xconvs(x)
57 |         x = x.view(x.size(0), -1)
58 |         x = F.relu(self.fc6(x), inplace=True)
59 |         
60 |         return x
61 | 


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/mlp_heads.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from models.imagenet.utils import convert_conv2convws_model
 6 | from utils.net import make_fc
 7 | from rcnn.utils.poolers import Pooler
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | @registry.ROI_CASCADE_HEADS.register("roi_2mlp_head")
13 | class roi_2mlp_head(nn.Module):
14 |     """Add a ReLU MLP with two hidden layers."""
15 | 
16 |     def __init__(self, dim_in, spatial_scale):
17 |         super().__init__()
18 |         self.dim_in = dim_in[-1]
19 | 
20 |         method = cfg.FAST_RCNN.ROI_XFORM_METHOD
21 |         resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
22 |         sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
23 |         pooler = Pooler(
24 |             method=method,
25 |             output_size=resolution,
26 |             scales=spatial_scale,
27 |             sampling_ratio=sampling_ratio,
28 |         )
29 |         input_size = self.dim_in * resolution[0] * resolution[1]
30 |         mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
31 |         use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN
32 |         use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN
33 |         self.pooler = pooler
34 |         self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn)
35 |         self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn)
36 |         self.dim_out = mlp_dim
37 |         
38 |         if cfg.FAST_RCNN.MLP_HEAD.USE_WS:
39 |             self = convert_conv2convws_model(self)
40 |             
41 |     def forward(self, x, proposals):
42 |         x = self.pooler(x, proposals)
43 |         x = x.view(x.size(0), -1)
44 | 
45 |         x = F.relu(self.fc6(x), inplace=True)
46 |         x = F.relu(self.fc7(x), inplace=True)
47 | 
48 |         return x
49 | 


--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.init as init
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | # ---------------------------------------------------------------------------- #
 9 | # R-CNN bbox branch outputs
10 | # ---------------------------------------------------------------------------- #
11 | @registry.ROI_CASCADE_OUTPUTS.register("box_output")
12 | class Box_output(nn.Module):
13 |     def __init__(self, dim_in):
14 |         super().__init__()
15 |         self.dim_in = dim_in
16 | 
17 |         self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES)
18 |         # self.avgpool = nn.AdaptiveAvgPool2d(1)
19 |         if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG:  # bg and fg
20 |             self.bbox_pred = nn.Linear(self.dim_in, 4 * 2)
21 |         else:
22 |             raise NotImplementedError
23 |             # self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES)
24 | 
25 |         self._init_weights()
26 | 
27 |     def _init_weights(self):
28 |         init.normal_(self.cls_score.weight, std=0.01)
29 |         init.constant_(self.cls_score.bias, 0)
30 |         init.normal_(self.bbox_pred.weight, std=0.001)
31 |         init.constant_(self.bbox_pred.bias, 0)
32 | 
33 |     def forward(self, x):
34 |         if x.ndimension() == 4:
35 |             x = nn.functional.adaptive_avg_pool2d(x, 1)
36 |             # x = self.avgpool(x)
37 |             x = x.view(x.size(0), -1)
38 |         cls_score = self.cls_score(x)
39 |         bbox_pred = self.bbox_pred(x)
40 | 
41 |         return cls_score, bbox_pred
42 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/fast_rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from rcnn.modeling.fast_rcnn import heads
 5 | from rcnn.modeling.fast_rcnn import outputs
 6 | from rcnn.modeling.fast_rcnn.inference import box_post_processor
 7 | from rcnn.modeling.fast_rcnn.loss import box_loss_evaluator
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | class FastRCNN(torch.nn.Module):
13 |     """
14 |     Generic Box Head class.
15 |     """
16 | 
17 |     def __init__(self, dim_in, spatial_scale):
18 |         super(FastRCNN, self).__init__()
19 |         head = registry.ROI_BOX_HEADS[cfg.FAST_RCNN.ROI_BOX_HEAD]
20 |         self.Head = head(dim_in, spatial_scale)
21 |         output = registry.ROI_BOX_OUTPUTS[cfg.FAST_RCNN.ROI_BOX_OUTPUT]
22 |         self.Output = output(self.Head.dim_out)
23 | 
24 |         self.post_processor = box_post_processor()
25 |         self.loss_evaluator = box_loss_evaluator()
26 | 
27 |     def forward(self, features, proposals, targets=None):
28 |         """
29 |         Arguments:
30 |             features (list[Tensor]): feature-maps from possibly several levels
31 |             proposals (list[BoxList]): proposal boxes
32 |             targets (list[BoxList], optional): the ground-truth targets.
33 | 
34 |         Returns:
35 |             x (Tensor): the result of the feature extractor
36 |             proposals (list[BoxList]): during training, the subsampled proposals
37 |                 are returned. During testing, the predicted boxlists are returned
38 |             losses (dict[Tensor]): During training, returns the losses for the
39 |                 head. During testing, returns an empty dict.
40 |         """
41 |         if self.training:
42 |             return self._forward_train(features, proposals, targets)
43 |         else:
44 |             return self._forward_test(features, proposals)
45 | 
46 |     def _forward_train(self, features, proposals, targets=None):
47 |         # Faster R-CNN subsamples during training the proposals with a fixed
48 |         # positive / negative ratio
49 |         with torch.no_grad():
50 |             proposals = self.loss_evaluator.subsample(proposals, targets)
51 | 
52 |         # extract features that will be fed to the final classifier. The
53 |         # feature_extractor generally corresponds to the pooler + heads
54 |         x = self.Head(features, proposals)
55 |         # final classifier that converts the features into predictions
56 |         class_logits, box_regression = self.Output(x)
57 | 
58 |         losses = self.loss_evaluator([class_logits], [box_regression])
59 |         return x, proposals, losses
60 | 
61 |     def _forward_test(self, features, proposals):
62 |         # extract features that will be fed to the final classifier. The
63 |         # feature_extractor generally corresponds to the pooler + heads
64 |         x = self.Head(features, proposals)
65 |         # final classifier that converts the features into predictions
66 |         class_logits, box_regression = self.Output(x)
67 | 
68 |         result = self.post_processor((class_logits, box_regression), proposals)
69 |         return x, result, {}
70 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp_heads import *
2 | from .convfc_heads import *
3 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/convfc_heads.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from models.imagenet.utils import convert_conv2convws_model
 5 | from utils.net import make_conv, make_fc
 6 | from rcnn.utils.poolers import Pooler
 7 | from rcnn.modeling import registry
 8 | from rcnn.core.config import cfg
 9 | 
10 | 
11 | @registry.ROI_BOX_HEADS.register("roi_xconv1fc_head")
12 | class roi_xconv1fc_head(nn.Module):
13 |     """Add a X conv + 1fc head"""
14 |     
15 |     def __init__(self, dim_in, spatial_scale):
16 |         super().__init__()
17 |         self.dim_in = dim_in[-1]
18 | 
19 |         method = cfg.FAST_RCNN.ROI_XFORM_METHOD
20 |         resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
21 |         sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
22 |         pooler = Pooler(
23 |             method=method,
24 |             output_size=resolution,
25 |             scales=spatial_scale,
26 |             sampling_ratio=sampling_ratio,
27 |         )
28 |         self.pooler = pooler
29 | 
30 |         use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
31 |         use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
32 |         use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
33 |         conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
34 |         num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
35 |         dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
36 |         
37 |         xconvs = []
38 |         for ix in range(num_stacked_convs):
39 |             xconvs.append(
40 |                 make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
41 |                           use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
42 |             )
43 |             self.dim_in = conv_dim
44 |         self.add_module("xconvs", nn.Sequential(*xconvs))
45 |         
46 |         input_size = self.dim_in * resolution[0] * resolution[1]
47 |         mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
48 |         self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
49 |         self.dim_out = mlp_dim
50 |         
51 |         if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
52 |             self = convert_conv2convws_model(self)
53 | 
54 |     def forward(self, x, proposals):
55 |         x = self.pooler(x, proposals)
56 |         x = self.xconvs(x)
57 |         x = x.view(x.size(0), -1)
58 |         x = F.relu(self.fc6(x), inplace=True)
59 |         
60 |         return x
61 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/mlp_heads.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from models.imagenet.utils import convert_conv2convws_model
 6 | from utils.net import make_fc
 7 | from rcnn.utils.poolers import Pooler
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | @registry.ROI_BOX_HEADS.register("roi_2mlp_head")
13 | class roi_2mlp_head(nn.Module):
14 |     """Add a ReLU MLP with two hidden layers."""
15 |     
16 |     def __init__(self, dim_in, spatial_scale):
17 |         super().__init__()
18 |         self.dim_in = dim_in[-1]
19 | 
20 |         method = cfg.FAST_RCNN.ROI_XFORM_METHOD
21 |         resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
22 |         sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
23 |         pooler = Pooler(
24 |             method=method,
25 |             output_size=resolution,
26 |             scales=spatial_scale,
27 |             sampling_ratio=sampling_ratio,
28 |         )
29 |         input_size = self.dim_in * resolution[0] * resolution[1]
30 |         mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
31 |         use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN
32 |         use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN
33 |         self.pooler = pooler
34 |         self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn)
35 |         self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn)
36 |         self.dim_out = mlp_dim
37 |         
38 |         if cfg.FAST_RCNN.MLP_HEAD.USE_WS:
39 |             self = convert_conv2convws_model(self)
40 | 
41 |     def forward(self, x, proposals):
42 |         x = self.pooler(x, proposals)
43 |         x = x.view(x.size(0), -1)
44 | 
45 |         x = F.relu(self.fc6(x), inplace=True)
46 |         x = F.relu(self.fc7(x), inplace=True)
47 | 
48 |         return x
49 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.init as init
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | # ---------------------------------------------------------------------------- #
 9 | # R-CNN bbox branch outputs
10 | # ---------------------------------------------------------------------------- #
11 | @registry.ROI_BOX_OUTPUTS.register("box_output")
12 | class Box_output(nn.Module):
13 |     def __init__(self, dim_in):
14 |         super().__init__()
15 |         self.dim_in = dim_in
16 |         self.cls_on = cfg.FAST_RCNN.CLS_ON
17 |         self.reg_on = cfg.FAST_RCNN.REG_ON
18 | 
19 |         if self.cls_on:
20 |             self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES)
21 |             init.normal_(self.cls_score.weight, std=0.01)
22 |             init.constant_(self.cls_score.bias, 0)
23 |         # self.avgpool = nn.AdaptiveAvgPool2d(1)
24 |         if self.reg_on:
25 |             if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG:  # bg and fg
26 |                 self.bbox_pred = nn.Linear(self.dim_in, 4 * 2)
27 |             else:
28 |                 self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES)
29 |             init.normal_(self.bbox_pred.weight, std=0.001)
30 |             init.constant_(self.bbox_pred.bias, 0)
31 | 
32 |     def forward(self, x):
33 |         if x.ndimension() == 4:
34 |             x = nn.functional.adaptive_avg_pool2d(x, 1)
35 |             # x = self.avgpool(x)
36 |             x = x.view(x.size(0), -1)
37 |         cls_score = self.cls_score(x) if self.cls_on else None
38 |         bbox_pred = self.bbox_pred(x) if self.reg_on else None
39 | 
40 |         return cls_score, bbox_pred
41 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fpn/HRFPN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from utils.net import make_conv
 6 | from rcnn.core.config import cfg
 7 | from rcnn.modeling import registry
 8 | 
 9 | 
10 | # ---------------------------------------------------------------------------- #
11 | # Functions for bolting HRFPN onto a backbone architectures
12 | # ---------------------------------------------------------------------------- #
13 | @registry.FPN_BODY.register("hrfpn")
14 | class hrfpn(nn.Module):
15 |     # dim_in = [w, w * 2, w * 4, w * 8]
16 |     # spatial_scale = [1/4, 1/8, 1/16, 1/32]
17 |     def __init__(self, dim_in, spatial_scale):
18 |         super().__init__()
19 |         self.dim_in = sum(dim_in)
20 |         self.spatial_scale = spatial_scale
21 | 
22 |         hrfpn_dim = cfg.FPN.HRFPN.DIM  # 256
23 |         use_lite = cfg.FPN.HRFPN.USE_LITE
24 |         use_bn = cfg.FPN.HRFPN.USE_BN
25 |         use_gn = cfg.FPN.HRFPN.USE_GN
26 |         if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG':
27 |             self.pooling = F.avg_pool2d
28 |         else:
29 |             self.pooling = F.max_pool2d
30 |         self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING    # 1
31 |         self.num_output = len(dim_in) + self.num_extra_pooling  # 5
32 | 
33 |         self.reduction_conv = make_conv(self.dim_in, hrfpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn)
34 |         self.dim_in = hrfpn_dim
35 | 
36 |         self.fpn_conv = nn.ModuleList()
37 |         for i in range(self.num_output):
38 |             self.fpn_conv.append(
39 |                 make_conv(self.dim_in, hrfpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn,
40 |                           suffix_1x1=use_lite)
41 |             )
42 |             self.dim_in = hrfpn_dim
43 | 
44 |         if self.num_extra_pooling:
45 |             self.spatial_scale.append(self.spatial_scale[-1] * 0.5)
46 |         self.dim_out = [self.dim_in for _ in range(self.num_output)]
47 |         self._init_weights()
48 | 
49 |     def _init_weights(self):
50 |         # weight initialization
51 |         for m in self.modules():
52 |             if isinstance(m, nn.Conv2d):
53 |                 nn.init.kaiming_uniform_(m.weight, a=1)
54 |                 if m.bias is not None:
55 |                     nn.init.zeros_(m.bias)
56 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
57 |                 nn.init.constant_(m.weight, 1)
58 |                 nn.init.constant_(m.bias, 0)
59 | 
60 |     def forward(self, x):
61 |         outs = [x[0]]
62 |         for i in range(1, len(x)):
63 |             outs.append(F.interpolate(x[i], scale_factor=2**i, mode='bilinear'))
64 |         out = torch.cat(outs, dim=1)
65 |         out = self.reduction_conv(out)
66 | 
67 |         outs = [out]
68 |         for i in range(1, self.num_output):
69 |             outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
70 |         fpn_output_blobs = []
71 |         for i in range(self.num_output):
72 |             fpn_output_blobs.append(self.fpn_conv[i](outs[i]))
73 | 
74 |         # use all levels
75 |         return fpn_output_blobs  # [P2 - P6]
76 | 


--------------------------------------------------------------------------------
/rcnn/modeling/fpn/__init__.py:
--------------------------------------------------------------------------------
1 | from .FPN import *
2 | from .HRFPN import *
3 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/keypoint_rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from utils.net import make_conv
 5 | from rcnn.utils.poolers import Pooler
 6 | from rcnn.modeling import registry
 7 | from rcnn.core.config import cfg
 8 | 
 9 | 
10 | @registry.ROI_KEYPOINT_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 |     def __init__(self, dim_in, spatial_scale):
13 |         super(roi_convx_head, self).__init__()
14 |         self.dim_in = dim_in[-1]
15 | 
16 |         method = cfg.KRCNN.ROI_XFORM_METHOD
17 |         resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
18 |         sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO
19 |         pooler = Pooler(
20 |             method=method,
21 |             output_size=resolution,
22 |             scales=spatial_scale,
23 |             sampling_ratio=sampling_ratio,
24 |         )
25 |         self.pooler = pooler
26 | 
27 |         use_lite = cfg.KRCNN.CONVX_HEAD.USE_LITE
28 |         use_bn = cfg.KRCNN.CONVX_HEAD.USE_BN
29 |         use_gn = cfg.KRCNN.CONVX_HEAD.USE_GN
30 |         conv_dim = cfg.KRCNN.CONVX_HEAD.CONV_DIM
31 |         num_stacked_convs = cfg.KRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 |         dilation = cfg.KRCNN.CONVX_HEAD.DILATION
33 | 
34 |         self.blocks = []
35 |         for layer_idx in range(num_stacked_convs):
36 |             layer_name = "keypoint_fcn{}".format(layer_idx + 1)
37 |             module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 |                                use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 |             self.add_module(layer_name, module)
40 |             self.dim_in = conv_dim
41 |             self.blocks.append(layer_name)
42 |         self.dim_out = self.dim_in
43 | 
44 |     def forward(self, x, proposals):
45 |         x = self.pooler(x, proposals)
46 | 
47 |         for layer_name in self.blocks:
48 |             x = F.relu(getattr(self, layer_name)(x))
49 | 
50 |         return x
51 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/gce_heads.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from models.ops import interpolate, NonLocal2d
  5 | from rcnn.core.config import cfg
  6 | from rcnn.modeling import registry
  7 | from rcnn.utils.poolers import Pooler
  8 | from utils.net import make_conv
  9 | 
 10 | 
 11 | @registry.ROI_KEYPOINT_HEADS.register("roi_gce_head")
 12 | class roi_gce_head(nn.Module):
 13 |     def __init__(self, dim_in, spatial_scale):
 14 |         super(roi_gce_head, self).__init__()
 15 |         self.dim_in = dim_in[-1]
 16 | 
 17 |         method = cfg.KRCNN.ROI_XFORM_METHOD
 18 |         resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
 19 |         sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO
 20 |         pooler = Pooler(
 21 |             method=method,
 22 |             output_size=resolution,
 23 |             scales=spatial_scale,
 24 |             sampling_ratio=sampling_ratio,
 25 |         )
 26 |         self.pooler = pooler
 27 | 
 28 |         use_nl = cfg.KRCNN.GCE_HEAD.USE_NL
 29 |         use_bn = cfg.KRCNN.GCE_HEAD.USE_BN
 30 |         use_gn = cfg.KRCNN.GCE_HEAD.USE_GN
 31 |         conv_dim = cfg.KRCNN.GCE_HEAD.CONV_DIM
 32 |         asppv3_dim = cfg.KRCNN.GCE_HEAD.ASPPV3_DIM
 33 |         num_convs_before_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
 34 |         asppv3_dilation = cfg.KRCNN.GCE_HEAD.ASPPV3_DILATION
 35 |         num_convs_after_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3
 36 | 
 37 |         # convx before asppv3 module
 38 |         before_asppv3_list = []
 39 |         for _ in range(num_convs_before_asppv3):
 40 |             before_asppv3_list.append(
 41 |                 make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 42 |             )
 43 |             self.dim_in = conv_dim
 44 |         self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None
 45 | 
 46 |         # asppv3 module
 47 |         self.asppv3 = []
 48 |         self.asppv3.append(
 49 |             make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 50 |         )
 51 |         for dilation in asppv3_dilation:
 52 |             self.asppv3.append(
 53 |                 make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn,
 54 |                           use_relu=True)
 55 |             )
 56 |         self.asppv3 = nn.ModuleList(self.asppv3)
 57 |         self.im_pool = nn.Sequential(
 58 |             nn.AdaptiveAvgPool2d(1),
 59 |             make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 60 |         )
 61 |         self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim
 62 | 
 63 |         feat_list = []
 64 |         feat_list.append(
 65 |             make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 66 |         )
 67 |         if use_nl:
 68 |             feat_list.append(
 69 |                 NonLocal2d(conv_dim, int(conv_dim * cfg.KRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)
 70 |             )
 71 |         self.feat = nn.Sequential(*feat_list)
 72 |         self.dim_in = conv_dim
 73 | 
 74 |         # convx after asppv3 module
 75 |         assert num_convs_after_asppv3 >= 1
 76 |         after_asppv3_list = []
 77 |         for _ in range(num_convs_after_asppv3):
 78 |             after_asppv3_list.append(
 79 |                 make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 80 |             )
 81 |             self.dim_in = conv_dim
 82 |         self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None
 83 |         self.dim_out = self.dim_in
 84 | 
 85 |     def forward(self, x, proposals):
 86 |         resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
 87 |         x = self.pooler(x, proposals)
 88 | 
 89 |         if self.conv_before_asppv3 is not None:
 90 |             x = self.conv_before_asppv3(x)
 91 | 
 92 |         asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution, 
 93 |                                   mode="bilinear", align_corners=False)]
 94 |         for i in range(len(self.asppv3)):
 95 |             asppv3_out.append(self.asppv3[i](x))
 96 |         asppv3_out = torch.cat(asppv3_out, 1)
 97 |         asppv3_out = self.feat(asppv3_out)
 98 | 
 99 |         if self.conv_after_asppv3 is not None:
100 |             x = self.conv_after_asppv3(asppv3_out)
101 |         return x
102 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/inference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from utils.data.structures.bounding_box import BoxList
 8 | from utils.data.structures.keypoint import PersonKeypoints
 9 | 
10 | 
11 | class KeypointPostProcessor(nn.Module):
12 |     def __init__(self):
13 |         super(KeypointPostProcessor, self).__init__()
14 | 
15 |     def forward(self, x, boxes):
16 |         boxes_per_image = [len(box) for box in boxes]
17 |         kpt_prob = x.split(boxes_per_image, dim=0)
18 | 
19 |         results = []
20 |         for prob, box in zip(kpt_prob, boxes):
21 |             bbox = BoxList(box.bbox, box.size, mode="xyxy")
22 |             for field in box.fields():
23 |                 bbox.add_field(field, box.get_field(field))
24 |             bbox.add_field("keypoints", prob.cpu().numpy())
25 |             results.append(bbox)
26 | 
27 |         return results
28 | 
29 | 
30 | def keypoint_results(maps, rois):
31 |     """Extract predicted keypoint locations from heatmaps. Output has shape
32 |     (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
33 |     for each keypoint.
34 |     """
35 |     # This function converts a discrete image coordinate in a HEATMAP_SIZE x
36 |     # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
37 |     # consistency with keypoints_to_heatmap_labels by using the conversion from
38 |     # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
39 |     # continuous coordinate.
40 |     rois = rois.bbox.numpy()
41 | 
42 |     offset_x = rois[:, 0]
43 |     offset_y = rois[:, 1]
44 | 
45 |     widths = rois[:, 2] - rois[:, 0]
46 |     heights = rois[:, 3] - rois[:, 1]
47 |     widths = np.maximum(widths, 1)
48 |     heights = np.maximum(heights, 1)
49 |     widths_ceil = np.ceil(widths)
50 |     heights_ceil = np.ceil(heights)
51 | 
52 |     # NCHW to NHWC for use with OpenCV
53 |     maps = np.transpose(maps, [0, 2, 3, 1])
54 |     min_size = 0  # cfg.KRCNN.INFERENCE_MIN_SIZE
55 |     num_keypoints = maps.shape[3]
56 |     xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32)
57 |     end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32)
58 |     for i in range(len(rois)):
59 |         if min_size > 0:
60 |             roi_map_width = int(np.maximum(widths_ceil[i], min_size))
61 |             roi_map_height = int(np.maximum(heights_ceil[i], min_size))
62 |         else:
63 |             roi_map_width = widths_ceil[i]
64 |             roi_map_height = heights_ceil[i]
65 |         width_correction = widths[i] / roi_map_width
66 |         height_correction = heights[i] / roi_map_height
67 |         roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC)
68 |         # Bring back to CHW
69 |         roi_map = np.transpose(roi_map, [2, 0, 1])
70 |         # roi_map_probs = scores_to_probs(roi_map.copy())
71 |         w = roi_map.shape[2]
72 |         pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1)
73 |         x_int = pos % w
74 |         y_int = (pos - x_int) // w
75 |         # assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max())
76 |         x = (x_int + 0.5) * width_correction
77 |         y = (y_int + 0.5) * height_correction
78 |         xy_preds[i, 0, :] = x + offset_x[i]
79 |         xy_preds[i, 1, :] = y + offset_y[i]
80 |         xy_preds[i, 2, :] = 1
81 |         end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int]
82 | 
83 |     xy = np.concatenate((xy_preds[:, :2, :], end_scores[:, np.newaxis, :]), axis=1)
84 | 
85 |     return np.transpose(xy_preds, [0, 2, 1]), xy
86 | 
87 | 
88 | def keypoint_post_processor():
89 |     keypoint_post_processor = KeypointPostProcessor()
90 |     return keypoint_post_processor
91 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/keypoint_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from rcnn.modeling.keypoint_rcnn import heads
 4 | from rcnn.modeling.keypoint_rcnn import outputs
 5 | from rcnn.modeling.keypoint_rcnn.inference import keypoint_post_processor
 6 | from rcnn.modeling.keypoint_rcnn.loss import keypoint_loss_evaluator
 7 | from rcnn.modeling import registry
 8 | from rcnn.core.config import cfg
 9 | 
10 | 
11 | class KeypointRCNN(torch.nn.Module):
12 |     def __init__(self, dim_in, spatial_scale):
13 |         super(KeypointRCNN, self).__init__()
14 |         if len(cfg.KRCNN.ROI_STRIDES) == 0:
15 |             self.spatial_scale = spatial_scale
16 |         else:
17 |             self.spatial_scale = [1. / stride for stride in cfg.KRCNN.ROI_STRIDES]
18 |             
19 |         head = registry.ROI_KEYPOINT_HEADS[cfg.KRCNN.ROI_KEYPOINT_HEAD]
20 |         self.Head = head(dim_in, self.spatial_scale)
21 |         output = registry.ROI_KEYPOINT_OUTPUTS[cfg.KRCNN.ROI_KEYPOINT_OUTPUT]
22 |         self.Output = output(self.Head.dim_out)
23 | 
24 |         self.post_processor = keypoint_post_processor()
25 |         self.loss_evaluator = keypoint_loss_evaluator()
26 | 
27 |     def forward(self, conv_features, proposals, targets=None):
28 |         if self.training:
29 |             return self._forward_train(conv_features, proposals, targets)
30 |         else:
31 |             return self._forward_test(conv_features, proposals)
32 | 
33 |     def _forward_train(self, conv_features, proposals, targets=None):
34 |         all_proposals = proposals
35 |         with torch.no_grad():
36 |             proposals = self.loss_evaluator.resample(proposals, targets)
37 | 
38 |         x = self.Head(conv_features, proposals)
39 |         kp_logits = self.Output(x)
40 | 
41 |         loss_kp = self.loss_evaluator(kp_logits)
42 | 
43 |         return x, all_proposals, dict(loss_kp=loss_kp)
44 | 
45 |     def _forward_test(self, conv_features, proposals):
46 |         x = self.Head(conv_features, proposals)
47 |         kp_logits = self.Output(x)
48 | 
49 |         result = self.post_processor(kp_logits, proposals)
50 |         return x, result, {}
51 | 


--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | @registry.ROI_KEYPOINT_OUTPUTS.register("keypoint_output")
 9 | class Keypoint_output(nn.Module):
10 |     def __init__(self, dim_in):
11 |         super(Keypoint_output, self).__init__()
12 |         num_keypoints = cfg.KRCNN.NUM_CLASSES
13 |         assert cfg.KRCNN.RESOLUTION[0] // cfg.KRCNN.ROI_XFORM_RESOLUTION[0] == \
14 |                cfg.KRCNN.RESOLUTION[1] // cfg.KRCNN.ROI_XFORM_RESOLUTION[1]
15 |         self.up_scale = cfg.KRCNN.RESOLUTION[0] // (cfg.KRCNN.ROI_XFORM_RESOLUTION[0] * 2)
16 | 
17 |         deconv_kernel = 4
18 |         self.kps_score_lowres = nn.ConvTranspose2d(
19 |             dim_in,
20 |             num_keypoints,
21 |             deconv_kernel,
22 |             stride=2,
23 |             padding=deconv_kernel // 2 - 1,
24 |         )
25 | 
26 |         nn.init.kaiming_normal_(self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu")
27 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
28 | 
29 |         self.dim_out = num_keypoints
30 | 
31 |     def forward(self, x):
32 |         x = self.kps_score_lowres(x)
33 |         if self.up_scale > 1:
34 |             x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
35 | 
36 |         return x
37 | 


--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/mask_rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | 


--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from models.imagenet.utils import convert_conv2convws_model
 5 | from utils.net import make_conv
 6 | from rcnn.utils.poolers import Pooler
 7 | from rcnn.modeling import registry
 8 | from rcnn.core.config import cfg
 9 | 
10 | 
11 | @registry.ROI_MASK_HEADS.register("roi_convx_head")
12 | class roi_convx_head(nn.Module):
13 |     """
14 |     Heads for FPN for classification
15 |     """
16 | 
17 |     def __init__(self, dim_in, spatial_scale):
18 |         """
19 |         Arguments:
20 |             num_classes (int): number of output classes
21 |             input_size (int): number of channels of the input once it's flattened
22 |             representation_size (int): size of the intermediate representation
23 |         """
24 |         super(roi_convx_head, self).__init__()
25 |         self.dim_in = dim_in[-1]
26 | 
27 |         method = cfg.MRCNN.ROI_XFORM_METHOD
28 |         resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION
29 |         sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO
30 |         pooler = Pooler(
31 |             method=method,
32 |             output_size=resolution,
33 |             scales=spatial_scale,
34 |             sampling_ratio=sampling_ratio,
35 |         )
36 |         self.pooler = pooler
37 | 
38 |         use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE
39 |         use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN
40 |         use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN
41 |         conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM
42 |         num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS
43 |         dilation = cfg.MRCNN.CONVX_HEAD.DILATION
44 | 
45 |         self.blocks = []
46 |         for layer_idx in range(num_stacked_convs):
47 |             layer_name = "mask_fcn{}".format(layer_idx + 1)
48 |             module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
49 |                                use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
50 |             self.add_module(layer_name, module)
51 |             self.dim_in = conv_dim
52 |             self.blocks.append(layer_name)
53 |         self.dim_out = self.dim_in
54 |         
55 |         if cfg.MRCNN.CONVX_HEAD.USE_WS:
56 |             self = convert_conv2convws_model(self)
57 | 
58 |         for m in self.modules():
59 |             if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
60 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity="relu")
61 |                 if m.bias is not None:
62 |                     nn.init.zeros_(m.bias)
63 |                 elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
64 |                     nn.init.constant_(m.weight, 1)
65 |                     nn.init.constant_(m.bias, 0)
66 | 
67 |     def forward(self, x, proposals):
68 |         x = self.pooler(x, proposals)
69 |         roi_feature = x
70 |         for layer_name in self.blocks:
71 |             x = F.relu(getattr(self, layer_name)(x))
72 | 
73 |         return x, roi_feature
74 | 
75 | 


--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from utils.data.structures.boxlist_ops import cat_boxlist
 4 | from rcnn.modeling.mask_rcnn import heads
 5 | from rcnn.modeling.mask_rcnn import outputs
 6 | from rcnn.modeling.mask_rcnn.inference import mask_post_processor
 7 | from rcnn.modeling.mask_rcnn.loss import mask_loss_evaluator
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | class MaskRCNN(torch.nn.Module):
13 |     def __init__(self, dim_in, spatial_scale):
14 |         super(MaskRCNN, self).__init__()
15 |         if len(cfg.MRCNN.ROI_STRIDES) == 0:
16 |             self.spatial_scale = spatial_scale
17 |         else:
18 |             self.spatial_scale = [1. / stride for stride in cfg.MRCNN.ROI_STRIDES]
19 | 
20 |         head = registry.ROI_MASK_HEADS[cfg.MRCNN.ROI_MASK_HEAD]
21 |         self.Head = head(dim_in, self.spatial_scale)
22 |         output = registry.ROI_MASK_OUTPUTS[cfg.MRCNN.ROI_MASK_OUTPUT]
23 |         self.Output = output(self.Head.dim_out)
24 | 
25 |         self.post_processor = mask_post_processor()
26 |         self.loss_evaluator = mask_loss_evaluator()
27 | 
28 |     def forward(self, conv_features, proposals, targets=None):
29 |         """
30 |         Arguments:
31 |             conv_features (list[Tensor]): feature-maps from possibly several levels
32 |             proposals (list[BoxList]): proposal boxes
33 |             targets (list[BoxList], optional): the ground-truth targets.
34 | 
35 |         Returns:
36 |             x (Tensor): the result of the feature extractor
37 |             all_proposals (list[BoxList]): during training, the original proposals
38 |                 are returned. During testing, the predicted boxlists are returned
39 |                 with the `mask` field set
40 |             losses (dict[Tensor]): During training, returns the losses for the
41 |                 head. During testing, returns an empty dict.
42 |         """
43 |         if self.training:
44 |             return self._forward_train(conv_features, proposals, targets)
45 |         else:
46 |             return self._forward_test(conv_features, proposals)
47 | 
48 |     def _forward_train(self, conv_features, proposals, targets=None):
49 |         # during training, only focus on positive boxes
50 |         all_proposals = proposals
51 | 
52 |         with torch.no_grad():
53 |             proposals = self.loss_evaluator.resample(proposals, targets)
54 | 
55 |         x, roi_feature = self.Head(conv_features, proposals)
56 |         mask_logits = self.Output(x)
57 | 
58 |         loss_mask = self.loss_evaluator(mask_logits)
59 |         return x, all_proposals, dict(loss_mask=loss_mask)
60 | 
61 |     def _forward_test(self, conv_features, proposals):
62 |         x, roi_feature = self.Head(conv_features, proposals)
63 |         mask_logits = self.Output(x)
64 | 
65 |         result = self.post_processor(mask_logits, proposals)
66 |         return x, result, {}
67 | 


--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | @registry.ROI_MASK_OUTPUTS.register("mask_deconv_output")
 9 | class Mask_deconv_output(nn.Module):
10 |     def __init__(self, dim_in):
11 |         super(Mask_deconv_output, self).__init__()
12 |         num_classes = cfg.MODEL.NUM_CLASSES
13 | 
14 |         self.mask_deconv = nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)
15 |         self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0)
16 | 
17 |         # init
18 |         nn.init.kaiming_normal_(self.mask_deconv.weight, mode='fan_out', nonlinearity="relu")
19 |         if self.mask_deconv.bias is not None:
20 |             nn.init.zeros_(self.mask_deconv.bias)
21 |         nn.init.normal_(self.mask_fcn_logits.weight, std=0.001)
22 |         if self.mask_fcn_logits.bias is not None:
23 |             nn.init.constant_(self.mask_fcn_logits.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = F.relu(self.mask_deconv(x))
27 |         return self.mask_fcn_logits(x)
28 | 
29 | 
30 | @registry.ROI_MASK_OUTPUTS.register("mask_logits_output")
31 | class Mask_logits_output(nn.Module):
32 |     def __init__(self, dim_in):
33 |         super(Mask_logits_output, self).__init__()
34 |         num_classes = cfg.MODEL.NUM_CLASSES
35 | 
36 |         self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0)
37 | 
38 |         # init
39 |         nn.init.normal_(self.mask_fcn_logits.weight, std=0.001)
40 |         if self.mask_fcn_logits.bias is not None:
41 |             nn.init.constant_(self.mask_fcn_logits.bias, 0)
42 | 
43 |     def forward(self, x):
44 |         return self.mask_fcn_logits(x)
45 | 


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/parsing_rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 | 


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from utils.net import make_conv
 5 | from rcnn.utils.poolers import Pooler
 6 | from rcnn.modeling import registry
 7 | from rcnn.core.config import cfg
 8 | 
 9 | 
10 | @registry.ROI_PARSING_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 |     def __init__(self, dim_in, spatial_scale):
13 |         super(roi_convx_head, self).__init__()
14 |         self.dim_in = dim_in[-1]
15 | 
16 |         method = cfg.PRCNN.ROI_XFORM_METHOD
17 |         resolution = cfg.PRCNN.ROI_XFORM_RESOLUTION
18 |         sampling_ratio = cfg.PRCNN.ROI_XFORM_SAMPLING_RATIO
19 |         pooler = Pooler(
20 |             method=method,
21 |             output_size=resolution,
22 |             scales=spatial_scale,
23 |             sampling_ratio=sampling_ratio,
24 |         )
25 |         self.pooler = pooler
26 | 
27 |         use_lite = cfg.PRCNN.CONVX_HEAD.USE_LITE
28 |         use_bn = cfg.PRCNN.CONVX_HEAD.USE_BN
29 |         use_gn = cfg.PRCNN.CONVX_HEAD.USE_GN
30 |         conv_dim = cfg.PRCNN.CONVX_HEAD.CONV_DIM
31 |         num_stacked_convs = cfg.PRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 |         dilation = cfg.PRCNN.CONVX_HEAD.DILATION
33 | 
34 |         self.blocks = []
35 |         for layer_idx in range(num_stacked_convs):
36 |             layer_name = "parsing_fcn{}".format(layer_idx + 1)
37 |             module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 |                                use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 |             self.add_module(layer_name, module)
40 |             self.dim_in = conv_dim
41 |             self.blocks.append(layer_name)
42 |         self.dim_out = self.dim_in
43 | 
44 |     def forward(self, x, proposals):
45 |         x = self.pooler(x, proposals)
46 |         roi_feature = x
47 |         for layer_name in self.blocks:
48 |             x = F.relu(getattr(self, layer_name)(x))
49 | 
50 |         return x, roi_feature
51 | 


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/inference.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | 
  7 | from utils.data.structures.bounding_box import BoxList
  8 | from models.ops.misc import interpolate
  9 | from rcnn.core.config import cfg
 10 | 
 11 | 
 12 | # TODO check if want to return a single BoxList or a composite
 13 | # object
 14 | class ParsingPostProcessor(nn.Module):
 15 |     """
 16 |     From the results of the CNN, post process the masks
 17 |     by taking the mask corresponding to the class with max
 18 |     probability (which are of fixed size and directly output
 19 |     by the CNN) and return the masks in the mask field of the BoxList.
 20 |     If a masker object is passed, it will additionally
 21 |     project the masks in the image according to the locations in boxes,
 22 |     """
 23 | 
 24 |     def __init__(self):
 25 |         super(ParsingPostProcessor, self).__init__()
 26 | 
 27 |     def forward(self, x, boxes):
 28 |         """
 29 |         Arguments:
 30 |             x (Tensor): the mask logits
 31 |             boxes (list[BoxList]): bounding boxes that are used as
 32 |                 reference, one for ech image
 33 |         Returns:
 34 |             results (list[BoxList]): one BoxList for each image, containing
 35 |                 the extra field mask
 36 |         """
 37 |         parsing_prob = x
 38 |         parsing_prob = F.softmax(parsing_prob, dim=1)
 39 | 
 40 |         boxes_per_image = [len(box) for box in boxes]
 41 |         parsing_prob = parsing_prob.split(boxes_per_image, dim=0)
 42 | 
 43 |         results = []
 44 |         for prob, box in zip(parsing_prob, boxes):
 45 |             bbox = BoxList(box.bbox, box.size, mode="xyxy")
 46 | 
 47 |             for field in box.fields():
 48 |                 bbox.add_field(field, box.get_field(field))
 49 |             bbox_scores = bbox.get_field("scores")
 50 |             bbox.add_field("parsing", prob.cpu().numpy())
 51 |             bbox.add_field("parsing_scores", bbox_scores.cpu().numpy())
 52 |             results.append(bbox)
 53 | 
 54 |         return results
 55 | 
 56 | 
 57 | def expand_boxes(boxes, h, w):
 58 |     """Expand an array of boxes by a given scale."""
 59 |     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
 60 |     h_half = (boxes[:, 3] - boxes[:, 1]) * .5
 61 |     x_c = (boxes[:, 2] + boxes[:, 0]) * .5
 62 |     y_c = (boxes[:, 3] + boxes[:, 1]) * .5
 63 | 
 64 |     h_scale = (h + 2.0) / h
 65 |     w_scale = (w + 2.0) / w
 66 |     w_half *= w_scale
 67 |     h_half *= h_scale
 68 | 
 69 |     boxes_exp = np.zeros(boxes.shape)
 70 |     boxes_exp[:, 0] = x_c - w_half
 71 |     boxes_exp[:, 2] = x_c + w_half
 72 |     boxes_exp[:, 1] = y_c - h_half
 73 |     boxes_exp[:, 3] = y_c + h_half
 74 | 
 75 |     return boxes_exp
 76 | 
 77 | 
 78 | def parsing_results(parsings, boxes, semseg=None):
 79 |     im_w, im_h = boxes.size
 80 |     parsings = parsings.transpose((0, 2, 3, 1))
 81 |     boxes = boxes.bbox.numpy()
 82 |     H, W = parsings.shape[1:3]
 83 |     N = parsings.shape[3]
 84 | 
 85 |     boxes = expand_boxes(boxes, H, W)
 86 |     boxes = boxes.astype(np.int32)
 87 |     padded_parsing = np.zeros((H + 2, W + 2, N), dtype=np.float32)
 88 | 
 89 |     if semseg is not None:
 90 |         semseg = cv2.resize(semseg, (im_w, im_h), interpolation=cv2.INTER_LINEAR)
 91 |     else:
 92 |         semseg = np.zeros((im_h, im_w, N), dtype=np.float32)
 93 | 
 94 |     parsing_results = []
 95 |     for i in range(boxes.shape[0]):
 96 |         padded_parsing[1:-1, 1:-1] = parsings[i]
 97 |         box = boxes[i, :]
 98 |         w = box[2] - box[0] + 1
 99 |         h = box[3] - box[1] + 1
100 |         w = np.maximum(w, 1)
101 |         h = np.maximum(h, 1)
102 |         parsing = cv2.resize(padded_parsing, (w, h), interpolation=cv2.INTER_LINEAR)
103 |         parsing_idx = np.argmax(parsing, axis=2)
104 |         im_parsing = np.zeros((im_h, im_w), dtype=np.uint8)
105 |         x_0 = max(box[0], 0)
106 |         x_1 = min(box[2] + 1, im_w)
107 |         y_0 = max(box[1], 0)
108 |         y_1 = min(box[3] + 1, im_h)
109 | 
110 |         mask = np.where(parsing_idx >= 1, 1, 0)
111 |         mask = mask[:, :, np.newaxis].repeat(N, axis=2)
112 |         cropped_semseg = semseg[y_0:y_1, x_0:x_1] * mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]
113 | 
114 |         parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] += \
115 |             cropped_semseg * cfg.PRCNN.SEMSEG_FUSE_WEIGHT
116 |         parsing = np.argmax(parsing, axis=2)
117 | 
118 |         im_parsing[y_0:y_1, x_0:x_1] = parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]
119 |         parsing_results.append(im_parsing)
120 |     return parsing_results
121 | 
122 | 
123 | def parsing_post_processor():
124 |     parsing_post_processor = ParsingPostProcessor()
125 |     return parsing_post_processor
126 | 


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | @registry.ROI_PARSING_OUTPUTS.register("parsing_output")
 9 | class Parsing_output(nn.Module):
10 |     def __init__(self, dim_in):
11 |         super(Parsing_output, self).__init__()
12 |         num_parsing = cfg.PRCNN.NUM_PARSING
13 |         assert cfg.PRCNN.RESOLUTION[0] // cfg.PRCNN.ROI_XFORM_RESOLUTION[0] == \
14 |                cfg.PRCNN.RESOLUTION[1] // cfg.PRCNN.ROI_XFORM_RESOLUTION[1]
15 |         self.up_scale = cfg.PRCNN.RESOLUTION[0] // (cfg.PRCNN.ROI_XFORM_RESOLUTION[0] * 2)
16 | 
17 |         deconv_kernel = 4
18 |         self.parsing_score_lowres = nn.ConvTranspose2d(
19 |             dim_in,
20 |             num_parsing,
21 |             deconv_kernel,
22 |             stride=2,
23 |             padding=deconv_kernel // 2 - 1,
24 |         )
25 | 
26 |         nn.init.kaiming_normal_(self.parsing_score_lowres.weight, mode="fan_out", nonlinearity="relu")
27 |         nn.init.constant_(self.parsing_score_lowres.bias, 0)
28 | 
29 |         self.dim_out = num_parsing
30 | 
31 |     def forward(self, x):
32 |         x = self.parsing_score_lowres(x)
33 |         if self.up_scale > 1:
34 |             x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
35 | 
36 |         return x
37 | 


--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/parsing_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from rcnn.modeling.parsing_rcnn import heads
 4 | from rcnn.modeling.parsing_rcnn import outputs
 5 | from rcnn.modeling.parsing_rcnn.inference import parsing_post_processor
 6 | from rcnn.modeling.parsing_rcnn.loss import parsing_loss_evaluator
 7 | from rcnn.modeling import registry
 8 | from rcnn.core.config import cfg
 9 | 
10 | 
11 | class ParsingRCNN(torch.nn.Module):
12 |     def __init__(self, dim_in, spatial_scale):
13 |         super(ParsingRCNN, self).__init__()
14 |         if len(cfg.PRCNN.ROI_STRIDES) == 0:
15 |             self.spatial_scale = spatial_scale
16 |         else:
17 |             self.spatial_scale = [1. / stride for stride in cfg.PRCNN.ROI_STRIDES]
18 | 
19 |         head = registry.ROI_PARSING_HEADS[cfg.PRCNN.ROI_PARSING_HEAD]
20 |         self.Head = head(dim_in, self.spatial_scale)
21 |         output = registry.ROI_PARSING_OUTPUTS[cfg.PRCNN.ROI_PARSING_OUTPUT]
22 |         self.Output = output(self.Head.dim_out)
23 | 
24 |         self.post_processor = parsing_post_processor()
25 |         self.loss_evaluator = parsing_loss_evaluator()
26 | 
27 |     def forward(self, conv_features, proposals, targets=None):
28 |         """
29 |         Arguments:
30 |             conv_features (list[Tensor]): feature-maps from possibly several levels
31 |             proposals (list[BoxList]): proposal boxes
32 |             targets (list[BoxList], optional): the ground-truth targets.
33 |         Returns:
34 |             x (Tensor): the result of the feature extractor
35 |             all_proposals (list[BoxList]): during training, the original proposals
36 |                 are returned. During testing, the predicted boxlists are returned
37 |                 with the `parsing` field set
38 |             losses (dict[Tensor]): During training, returns the losses for the
39 |                 head. During testing, returns an empty dict.
40 |         """
41 |         if self.training:
42 |             return self._forward_train(conv_features, proposals, targets)
43 |         else:
44 |             return self._forward_test(conv_features, proposals)
45 | 
46 |     def _forward_train(self, conv_features, proposals, targets=None):
47 |         all_proposals = proposals
48 |         with torch.no_grad():
49 |             proposals = self.loss_evaluator.resample(proposals, targets)
50 | 
51 |         x, roi_feature = self.Head(conv_features, proposals)
52 |         parsing_logits = self.Output(x)
53 | 
54 |         loss_parsing = self.loss_evaluator(parsing_logits)
55 |         return x, all_proposals, dict(loss_parsing=loss_parsing)
56 | 
57 |     def _forward_test(self, conv_features, proposals):
58 |         x, roi_feature = self.Head(conv_features, proposals)
59 |         parsing_logits = self.Output(x)
60 | 
61 |         result = self.post_processor(parsing_logits, proposals)
62 |         return x, result, {}
63 | 


--------------------------------------------------------------------------------
/rcnn/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | from utils.registry import Registry
 2 | 
 3 | 
 4 | """
 5 | Feature Extractor.
 6 | """
 7 | # Backbone
 8 | BACKBONES = Registry()
 9 | 
10 | # FPN
11 | FPN_BODY = Registry()
12 | 
13 | 
14 | """
15 | ROI Head.
16 | """
17 | # Box Head
18 | ROI_CLS_HEADS = Registry()
19 | ROI_CLS_OUTPUTS = Registry()
20 | ROI_BOX_HEADS = Registry()
21 | ROI_BOX_OUTPUTS = Registry()
22 | 
23 | # Cascade Head
24 | ROI_CASCADE_HEADS = Registry()
25 | ROI_CASCADE_OUTPUTS = Registry()
26 | 
27 | # Mask Head
28 | ROI_MASK_HEADS = Registry()
29 | ROI_MASK_OUTPUTS = Registry()
30 | 
31 | # Keypoint Head
32 | ROI_KEYPOINT_HEADS = Registry()
33 | ROI_KEYPOINT_OUTPUTS = Registry()
34 | 
35 | # Parsing Head
36 | ROI_PARSING_HEADS = Registry()
37 | ROI_PARSING_OUTPUTS = Registry()
38 | 
39 | # UV Head
40 | ROI_UV_HEADS = Registry()
41 | ROI_UV_OUTPUTS = Registry()
42 | 
43 | 


--------------------------------------------------------------------------------
/rcnn/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from utils.net import make_conv
 5 | from rcnn.utils.poolers import Pooler
 6 | from rcnn.modeling import registry
 7 | from rcnn.core.config import cfg
 8 | 
 9 | 
10 | @registry.ROI_UV_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 |     def __init__(self, dim_in, spatial_scale):
13 |         super(roi_convx_head, self).__init__()
14 |         self.dim_in = dim_in[-1]
15 | 
16 |         method = cfg.UVRCNN.ROI_XFORM_METHOD
17 |         resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
18 |         sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
19 |         pooler = Pooler(
20 |             method=method,
21 |             output_size=resolution,
22 |             scales=spatial_scale,
23 |             sampling_ratio=sampling_ratio,
24 |         )
25 |         self.pooler = pooler
26 | 
27 |         use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE
28 |         use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN
29 |         use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN
30 |         conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM
31 |         num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 |         dilation = cfg.UVRCNN.CONVX_HEAD.DILATION
33 | 
34 |         self.blocks = []
35 |         for layer_idx in range(num_stacked_convs):
36 |             layer_name = "UV_fcn{}".format(layer_idx + 1)
37 |             module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 |                                use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 |             self.add_module(layer_name, module)
40 |             self.dim_in = conv_dim
41 |             self.blocks.append(layer_name)
42 |         self.dim_out = self.dim_in
43 | 
44 |     def forward(self, x, proposals):
45 |         x = self.pooler(x, proposals)
46 | 
47 |         for layer_name in self.blocks:
48 |             x = F.relu(getattr(self, layer_name)(x))
49 | 
50 |         return x
51 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/gce_heads.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from models.ops import interpolate, NonLocal2d
  5 | from rcnn.core.config import cfg
  6 | from rcnn.modeling import registry
  7 | from rcnn.utils.poolers import Pooler
  8 | from utils.net import make_conv
  9 | 
 10 | 
 11 | @registry.ROI_UV_HEADS.register("roi_gce_head")
 12 | class roi_gce_head(nn.Module):
 13 |     def __init__(self, dim_in, spatial_scale):
 14 |         super(roi_gce_head, self).__init__()
 15 |         self.dim_in = dim_in[-1]
 16 | 
 17 |         method = cfg.UVRCNN.ROI_XFORM_METHOD
 18 |         resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
 19 |         sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
 20 |         pooler = Pooler(
 21 |             method=method,
 22 |             output_size=resolution,
 23 |             scales=spatial_scale,
 24 |             sampling_ratio=sampling_ratio,
 25 |         )
 26 |         self.pooler = pooler
 27 | 
 28 |         use_nl = cfg.UVRCNN.GCE_HEAD.USE_NL
 29 |         use_bn = cfg.UVRCNN.GCE_HEAD.USE_BN
 30 |         use_gn = cfg.UVRCNN.GCE_HEAD.USE_GN
 31 |         conv_dim = cfg.UVRCNN.GCE_HEAD.CONV_DIM
 32 |         asppv3_dim = cfg.UVRCNN.GCE_HEAD.ASPPV3_DIM
 33 |         num_convs_before_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
 34 |         asppv3_dilation = cfg.UVRCNN.GCE_HEAD.ASPPV3_DILATION
 35 |         num_convs_after_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3
 36 | 
 37 |         # convx before asppv3 module
 38 |         before_asppv3_list = []
 39 |         for _ in range(num_convs_before_asppv3):
 40 |             before_asppv3_list.append(
 41 |                 make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 42 |             )
 43 |             self.dim_in = conv_dim
 44 |         self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None
 45 | 
 46 |         # asppv3 module
 47 |         self.asppv3 = []
 48 |         self.asppv3.append(
 49 |             make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 50 |         )
 51 |         for dilation in asppv3_dilation:
 52 |             self.asppv3.append(
 53 |                 make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn,
 54 |                           use_relu=True)
 55 |             )
 56 |         self.asppv3 = nn.ModuleList(self.asppv3)
 57 |         self.im_pool = nn.Sequential(
 58 |             nn.AdaptiveAvgPool2d(1),
 59 |             make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 60 |         )
 61 |         self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim
 62 | 
 63 |         feat_list = []
 64 |         feat_list.append(
 65 |             make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 66 |         )
 67 |         if use_nl:
 68 |             feat_list.append(
 69 |                 NonLocal2d(conv_dim, int(conv_dim * cfg.UVRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)
 70 |             )
 71 |         self.feat = nn.Sequential(*feat_list)
 72 |         self.dim_in = conv_dim
 73 | 
 74 |         # convx after asppv3 module
 75 |         assert num_convs_after_asppv3 >= 1
 76 |         after_asppv3_list = []
 77 |         for _ in range(num_convs_after_asppv3):
 78 |             after_asppv3_list.append(
 79 |                 make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)
 80 |             )
 81 |             self.dim_in = conv_dim
 82 |         self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None
 83 |         self.dim_out = self.dim_in
 84 | 
 85 |     def forward(self, x, proposals):
 86 |         resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
 87 |         x = self.pooler(x, proposals)
 88 | 
 89 |         if self.conv_before_asppv3 is not None:
 90 |             x = self.conv_before_asppv3(x)
 91 | 
 92 |         asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution, 
 93 |                                   mode="bilinear", align_corners=False)]
 94 |         for i in range(len(self.asppv3)):
 95 |             asppv3_out.append(self.asppv3[i](x))
 96 |         asppv3_out = torch.cat(asppv3_out, 1)
 97 |         asppv3_out = self.feat(asppv3_out)
 98 | 
 99 |         if self.conv_after_asppv3 is not None:
100 |             x = self.conv_after_asppv3(asppv3_out)
101 |         return x
102 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/inference.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | 
  7 | from utils.data.structures.bounding_box import BoxList
  8 | from rcnn.core.config import cfg
  9 | 
 10 | 
 11 | # TODO check if want to return a single BoxList or a composite
 12 | # object
 13 | class UVPostProcessor(nn.Module):
 14 |     """
 15 |     From the results of the CNN, post process the masks
 16 |     by taking the mask corresponding to the class with max
 17 |     probability (which are of fixed size and directly output
 18 |     by the CNN) and return the masks in the mask field of the BoxList.
 19 | 
 20 |     If a masker object is passed, it will additionally
 21 |     project the masks in the image according to the locations in boxes,
 22 |     """
 23 | 
 24 |     def __init__(self):
 25 |         super(UVPostProcessor, self).__init__()
 26 | 
 27 |     def forward(self, uv_logits, boxes):
 28 |         """
 29 |         Arguments:
 30 |             uv_logits (List): the uv logits
 31 |             boxes (list[BoxList]): bounding boxes that are used as
 32 |                 reference, one for ech image
 33 | 
 34 |         Returns:
 35 |             results (list[BoxList]): one BoxList for each image, containing
 36 |                 the extra field mask
 37 |         """
 38 |         UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V = uv_logits
 39 | 
 40 |         boxes_per_image = [len(box) for box in boxes]
 41 |         UV_pred_Ann = UV_pred_Ann.split(boxes_per_image, dim=0)
 42 |         UV_pred_Index = UV_pred_Index.split(boxes_per_image, dim=0)
 43 |         UV_pred_U = UV_pred_U.split(boxes_per_image, dim=0)
 44 |         UV_pred_V = UV_pred_V.split(boxes_per_image, dim=0)
 45 | 
 46 |         results = []
 47 |         for Ann, Index, U, V, box in zip(UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V, boxes):
 48 |             bbox = BoxList(box.bbox, box.size, mode="xyxy")
 49 |             for field in box.fields():
 50 |                 bbox.add_field(field, box.get_field(field))
 51 |             bbox.add_field("uv", [Ann.cpu().numpy(), Index.cpu().numpy(), U.cpu().numpy(), V.cpu().numpy()])
 52 |             results.append(bbox)
 53 | 
 54 |         return results
 55 | 
 56 | 
 57 | def uv_results(uv_logits, boxes):
 58 |     AnnIndex, Index_UV, U_uv, V_uv = uv_logits
 59 |     K = cfg.UVRCNN.NUM_PATCHES + 1
 60 |     boxes = boxes.bbox.numpy()
 61 |     uvs_results = []
 62 |     for ind, entry in enumerate(boxes):
 63 |         # Compute ref box width and height
 64 |         bx = max(entry[2] - entry[0], 1)
 65 |         by = max(entry[3] - entry[1], 1)
 66 | 
 67 |         # preds[ind] axes are CHW; bring p axes to WHC
 68 |         CurAnnIndex = np.swapaxes(AnnIndex[ind], 0, 2)
 69 |         CurIndex_UV = np.swapaxes(Index_UV[ind], 0, 2)
 70 |         CurU_uv = np.swapaxes(U_uv[ind], 0, 2)
 71 |         CurV_uv = np.swapaxes(V_uv[ind], 0, 2)
 72 | 
 73 |         # Resize p from (HEATMAP_SIZE, HEATMAP_SIZE, c) to (int(bx), int(by), c)
 74 |         CurAnnIndex = cv2.resize(CurAnnIndex, (by, bx))
 75 |         CurIndex_UV = cv2.resize(CurIndex_UV, (by, bx))
 76 |         CurU_uv = cv2.resize(CurU_uv, (by, bx))
 77 |         CurV_uv = cv2.resize(CurV_uv, (by, bx))
 78 | 
 79 |         # Bring Cur_Preds axes back to CHW
 80 |         CurAnnIndex = np.swapaxes(CurAnnIndex, 0, 2)
 81 |         CurIndex_UV = np.swapaxes(CurIndex_UV, 0, 2)
 82 |         CurU_uv = np.swapaxes(CurU_uv, 0, 2)
 83 |         CurV_uv = np.swapaxes(CurV_uv, 0, 2)
 84 | 
 85 |         # Removed squeeze calls due to singleton dimension issues
 86 |         CurAnnIndex = np.argmax(CurAnnIndex, axis=0)
 87 |         CurIndex_UV = np.argmax(CurIndex_UV, axis=0)
 88 |         CurIndex_UV = CurIndex_UV * (CurAnnIndex>0).astype(np.float32)
 89 | 
 90 |         output = np.zeros([3, int(by), int(bx)], dtype=np.float32)
 91 |         output[0] = CurIndex_UV
 92 | 
 93 |         for part_id in range(1, K):
 94 |             CurrentU = CurU_uv[part_id]
 95 |             CurrentV = CurV_uv[part_id]
 96 |             output[1, CurIndex_UV==part_id] = CurrentU[CurIndex_UV==part_id]
 97 |             output[2, CurIndex_UV==part_id] = CurrentV[CurIndex_UV==part_id]
 98 |         uvs_results.append(output)
 99 |     return uvs_results
100 | 
101 | 
102 | def uv_post_processor():
103 |     uv_post_processor = UVPostProcessor()
104 |     return uv_post_processor
105 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/outputs.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from rcnn.modeling import registry
 5 | from rcnn.core.config import cfg
 6 | 
 7 | 
 8 | @registry.ROI_UV_OUTPUTS.register("uv_output")
 9 | class UV_output(nn.Module):
10 |     def __init__(self, dim_in):
11 |         super(UV_output, self).__init__()
12 |         num_patches = cfg.UVRCNN.NUM_PATCHES
13 |         deconv_kernel = 4
14 |         assert cfg.UVRCNN.RESOLUTION[0] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] == \
15 |                cfg.UVRCNN.RESOLUTION[1] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[1]
16 |         self.up_scale = cfg.UVRCNN.RESOLUTION[0] // (cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] * 2)
17 | 
18 |         self.deconv_Ann = nn.ConvTranspose2d(dim_in, 15, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
19 |         self.deconv_Index = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2,
20 |                                                padding=deconv_kernel // 2 - 1)
21 |         self.deconv_U = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
22 |         self.deconv_V = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
23 | 
24 |         for m in self.modules():
25 |             if isinstance(m, nn.ConvTranspose2d):
26 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
27 |                 nn.init.constant_(m.bias, 0)
28 | 
29 |     def forward(self, x):
30 |         x_Ann = self.deconv_Ann(x)
31 |         x_Index = self.deconv_Index(x)
32 |         x_U = self.deconv_U(x)
33 |         x_V = self.deconv_V(x)
34 | 
35 |         if self.up_scale > 1:
36 |             x_Ann = F.interpolate(x_Ann, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
37 |             x_Index = F.interpolate(x_Index, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
38 |             x_U = F.interpolate(x_U, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
39 |             x_V = F.interpolate(x_V, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
40 | 
41 |         return [x_Ann, x_Index, x_U, x_V]
42 | 


--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/uv_rcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from rcnn.modeling.uv_rcnn import heads
 5 | from rcnn.modeling.uv_rcnn import outputs
 6 | from rcnn.modeling.uv_rcnn.inference import uv_post_processor
 7 | from rcnn.modeling.uv_rcnn.loss import uv_loss_evaluator
 8 | from rcnn.modeling import registry
 9 | from rcnn.core.config import cfg
10 | 
11 | 
12 | class UVRCNN(torch.nn.Module):
13 |     def __init__(self, dim_in, spatial_scale):
14 |         super(UVRCNN, self).__init__()
15 |         if len(cfg.UVRCNN.ROI_STRIDES) == 0:
16 |             self.spatial_scale = spatial_scale
17 |         else:
18 |             self.spatial_scale = [1. / stride for stride in cfg.UVRCNN.ROI_STRIDES]
19 |         # self.roi_batch_size = cfg.UVRCNN.ROI_BATCH_SIZE   # TODO
20 | 
21 |         head = registry.ROI_UV_HEADS[cfg.UVRCNN.ROI_UV_HEAD]
22 |         self.Head = head(dim_in, self.spatial_scale)
23 |         output = registry.ROI_UV_OUTPUTS[cfg.UVRCNN.ROI_UV_OUTPUT]
24 |         self.Output = output(self.Head.dim_out)
25 | 
26 |         self.post_processor = uv_post_processor()
27 |         self.loss_evaluator = uv_loss_evaluator()
28 | 
29 |     def forward(self, conv_features, proposals, targets=None):
30 |         if self.training:
31 |             return self._forward_train(conv_features, proposals, targets)
32 |         else:
33 |             return self._forward_test(conv_features, proposals)
34 | 
35 |     def _forward_train(self, conv_features, proposals, targets=None):
36 |         all_proposals = proposals
37 |         with torch.no_grad():
38 |             proposals = self.loss_evaluator.resample(proposals, targets)
39 | 
40 |         x = self.Head(conv_features, proposals)
41 |         uv_logits = self.Output(x)
42 | 
43 |         loss_Upoints, loss_Vpoints, loss_seg_AnnIndex, loss_IndexUVPoints = self.loss_evaluator(uv_logits)
44 |         loss_dict = dict(loss_Upoints=loss_Upoints, loss_Vpoints=loss_Vpoints,
45 |                          loss_seg_Ann=loss_seg_AnnIndex, loss_IPoints=loss_IndexUVPoints)
46 | 
47 |         return x, all_proposals, loss_dict
48 | 
49 |     def _forward_test(self, conv_features, proposals):
50 |         x = self.Head(conv_features, proposals)
51 |         uv_logits = self.Output(x)
52 | 
53 |         result = self.post_processor(uv_logits, proposals)
54 |         return x, result, {}
55 | 


--------------------------------------------------------------------------------
/rcnn/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import ROIAlign
2 | from .roi_align import roi_align
3 | from .roi_pool import ROIPool
4 | from .roi_pool import roi_pool
5 | from .deform_pool import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
6 | from .deform_pool import deform_roi_pooling
7 | 


--------------------------------------------------------------------------------
/rcnn/ops/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from models.ops import _C
 8 | 
 9 | from apex import amp
10 | 
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         ctx.aligned = aligned
21 |         output = _C.roi_align_forward(
22 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
23 |         )
24 |         return output
25 | 
26 |     @staticmethod
27 |     @once_differentiable
28 |     def backward(ctx, grad_output):
29 |         rois, = ctx.saved_tensors
30 |         output_size = ctx.output_size
31 |         spatial_scale = ctx.spatial_scale
32 |         sampling_ratio = ctx.sampling_ratio
33 |         bs, ch, h, w = ctx.input_shape
34 |         grad_input = _C.roi_align_backward(
35 |             grad_output, rois, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, sampling_ratio, ctx.aligned
36 |         )
37 |         return grad_input, None, None, None, None, None
38 | 
39 | 
40 | roi_align = _ROIAlign.apply
41 | 
42 | 
43 | class ROIAlign(nn.Module):
44 |     def __init__(self, output_size, spatial_scale, sampling_ratio, aligned):
45 |         super(ROIAlign, self).__init__()
46 |         self.output_size = output_size
47 |         self.spatial_scale = spatial_scale
48 |         self.sampling_ratio = sampling_ratio
49 |         self.aligned = aligned
50 | 
51 |     @amp.float_function
52 |     def forward(self, input, rois):
53 |         return roi_align(
54 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
55 |         )
56 | 
57 |     def __repr__(self):
58 |         tmpstr = self.__class__.__name__ + "("
59 |         tmpstr += "output_size=" + str(self.output_size)
60 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
61 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/rcnn/ops/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from models.ops import _C
 8 | 
 9 | from apex import amp
10 | 
11 | 
12 | class _ROIPool(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale):
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.input_shape = input.size()
18 |         output, argmax = _C.roi_pool_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1]
20 |         )
21 |         ctx.save_for_backward(input, roi, argmax)
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         input, rois, argmax = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_pool_backward(
32 |             grad_output, input, rois, argmax, spatial_scale, output_size[0], output_size[1], bs, ch, h, w,
33 |         )
34 |         return grad_input, None, None, None
35 | 
36 | 
37 | roi_pool = _ROIPool.apply
38 | 
39 | 
40 | class ROIPool(nn.Module):
41 |     def __init__(self, output_size, spatial_scale):
42 |         super(ROIPool, self).__init__()
43 |         self.output_size = output_size
44 |         self.spatial_scale = spatial_scale
45 | 
46 |     @amp.float_function
47 |     def forward(self, input, rois):
48 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
49 | 
50 |     def __repr__(self):
51 |         tmpstr = self.__class__.__name__ + "("
52 |         tmpstr += "output_size=" + str(self.output_size)
53 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
54 |         tmpstr += ")"
55 |         return tmpstr
56 | 


--------------------------------------------------------------------------------
/rcnn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/utils/__init__.py


--------------------------------------------------------------------------------
/rcnn/utils/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class BalancedPositiveNegativeSampler(object):
 5 |     """
 6 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 7 |     """
 8 | 
 9 |     def __init__(self, batch_size_per_image, positive_fraction):
10 |         """
11 |         Arguments:
12 |             batch_size_per_image (int): number of elements to be selected per image
13 |             positive_fraction (float): percentace of positive elements per batch
14 |         """
15 |         self.batch_size_per_image = batch_size_per_image
16 |         self.positive_fraction = positive_fraction
17 | 
18 |     def __call__(self, matched_idxs):
19 |         """
20 |         Arguments:
21 |             matched idxs: list of tensors containing -1, 0 or positive values.
22 |                 Each tensor corresponds to a specific image.
23 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
24 |                 positives.
25 | 
26 |         Returns:
27 |             pos_idx (list[tensor])
28 |             neg_idx (list[tensor])
29 | 
30 |         Returns two lists of binary masks for each image.
31 |         The first list contains the positive elements that were selected,
32 |         and the second list the negative example.
33 |         """
34 |         pos_idx = []
35 |         neg_idx = []
36 |         for matched_idxs_per_image in matched_idxs:
37 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
38 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
39 | 
40 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
41 |             # protect against not enough positive examples
42 |             num_pos = min(positive.numel(), num_pos)
43 |             num_neg = self.batch_size_per_image - num_pos
44 |             # protect against not enough negative examples
45 |             num_neg = min(negative.numel(), num_neg)
46 | 
47 |             # randomly select positive and negative examples
48 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
49 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
50 | 
51 |             pos_idx_per_image = positive[perm1]
52 |             neg_idx_per_image = negative[perm2]
53 | 
54 |             # create binary mask from indices
55 |             pos_idx_per_image_mask = torch.zeros_like(
56 |                 matched_idxs_per_image, dtype=torch.uint8
57 |             )
58 |             neg_idx_per_image_mask = torch.zeros_like(
59 |                 matched_idxs_per_image, dtype=torch.uint8
60 |             )
61 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
62 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
63 | 
64 |             pos_idx.append(pos_idx_per_image_mask)
65 |             neg_idx.append(neg_idx_per_image_mask)
66 | 
67 |         return pos_idx, neg_idx
68 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.13
 2 | pyyaml>=3.12
 3 | matplotlib
 4 | opencv-python>=3.4.0
 5 | setuptools
 6 | Cython
 7 | scipy
 8 | six
 9 | scikit-image
10 | Pillow>=6.1.0 
11 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
 2 | 
 3 | Usage:
 4 | import this module before import any modules under lib/
 5 | e.g 
 6 |     import _init_paths
 7 |     from core.config import cfg
 8 | """
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | 
19 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
20 | 
21 | # Add lib to PYTHONPATH
22 | lib_path = osp.join(this_dir)
23 | add_path(lib_path)
24 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | import _init_paths  # pylint: disable=unused-import
 5 | from utils.misc import mkdir_p, logging_rank
 6 | 
 7 | from rcnn.core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg
 8 | from rcnn.core.test_engine import run_inference
 9 | 
10 | # Parse arguments
11 | parser = argparse.ArgumentParser(description='Hier R-CNN Model Testing')
12 | parser.add_argument('--cfg', dest='cfg_file',
13 |                     help='optional config file',
14 |                     default='./cfgs/mscoco_humanparts/e2e_hier_rcnn_R-50-FPN_1x.yaml', type=str)
15 | parser.add_argument('--gpu_id', type=str, default='0,1,2,3,4,5,6,7', help='gpu id for evaluation')
16 | parser.add_argument('--range', help='start (inclusive) and end (exclusive) indices', type=int, nargs=2)
17 | parser.add_argument('opts', help='See rcnn/core/config.py for all options',
18 |                     default=None,
19 |                     nargs=argparse.REMAINDER)
20 | args = parser.parse_args()
21 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
22 | 
23 | 
24 | def main():
25 |     if len(args.gpu_id.split(',')) == 1:
26 |         local_rank = int(args.gpu_id.split(',')[0])
27 |     else:
28 |         local_rank = -1
29 |     args.local_rank = local_rank
30 | 
31 |     num_gpus = len(args.gpu_id.split(','))
32 |     multi_gpu_testing = True if num_gpus > 1 else False
33 | 
34 |     if args.cfg_file is not None:
35 |         merge_cfg_from_file(args.cfg_file)
36 |     if args.opts is not None:
37 |         merge_cfg_from_list(args.opts)
38 | 
39 |     if not os.path.isdir(os.path.join(cfg.CKPT, 'test')):
40 |         mkdir_p(os.path.join(cfg.CKPT, 'test'))
41 |     if cfg.VIS.ENABLED:
42 |         if not os.path.exists(os.path.join(cfg.CKPT, 'vis')):
43 |             mkdir_p(os.path.join(cfg.CKPT, 'vis'))
44 | 
45 |     assert_and_infer_cfg(make_immutable=False)
46 |     args.test_net_file, _ = os.path.splitext(__file__)
47 |     run_inference(
48 |         args,
49 |         ind_range=args.range,
50 |         multi_gpu_testing=multi_gpu_testing
51 |     )
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     main()
56 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/__init__.py


--------------------------------------------------------------------------------
/utils/collections.py:
--------------------------------------------------------------------------------
 1 | class AttrDict(dict):
 2 | 
 3 |     IMMUTABLE = '__immutable__'
 4 | 
 5 |     def __init__(self, *args, **kwargs):
 6 |         super(AttrDict, self).__init__(*args, **kwargs)
 7 |         self.__dict__[AttrDict.IMMUTABLE] = False
 8 | 
 9 |     def __getattr__(self, name):
10 |         if name in self.__dict__:
11 |             return self.__dict__[name]
12 |         elif name in self:
13 |             return self[name]
14 |         else:
15 |             raise AttributeError(name)
16 | 
17 |     def __setattr__(self, name, value):
18 |         if not self.__dict__[AttrDict.IMMUTABLE]:
19 |             if name in self.__dict__:
20 |                 self.__dict__[name] = value
21 |             else:
22 |                 self[name] = value
23 |         else:
24 |             raise AttributeError(
25 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
26 |                 format(name, value)
27 |             )
28 | 
29 |     def immutable(self, is_immutable):
30 |         """Set immutability to is_immutable and recursively apply the setting
31 |         to all nested AttrDicts.
32 |         """
33 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
34 |         # Recursively set immutable state
35 |         for v in self.__dict__.values():
36 |             if isinstance(v, AttrDict):
37 |                 v.immutable(is_immutable)
38 |         for v in self.values():
39 |             if isinstance(v, AttrDict):
40 |                 v.immutable(is_immutable)
41 | 
42 |     def is_immutable(self):
43 |         return self.__dict__[AttrDict.IMMUTABLE]
44 | 


--------------------------------------------------------------------------------
/utils/comm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains primitives for multi-gpu communication.
  3 | This is useful when doing distributed training.
  4 | """
  5 | 
  6 | import pickle
  7 | import time
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | 
 13 | 
 14 | def get_world_size():
 15 |     if not dist.is_available():
 16 |         return 1
 17 |     if not dist.is_initialized():
 18 |         return 1
 19 |     return dist.get_world_size()
 20 | 
 21 | 
 22 | def get_rank():
 23 |     if not dist.is_available():
 24 |         return 0
 25 |     if not dist.is_initialized():
 26 |         return 0
 27 |     return dist.get_rank()
 28 | 
 29 | 
 30 | def is_main_process():
 31 |     return get_rank() == 0
 32 | 
 33 | 
 34 | def synchronize():
 35 |     """
 36 |     Helper function to synchronize (barrier) among all processes when
 37 |     using distributed training
 38 |     """
 39 |     if not dist.is_available():
 40 |         return
 41 |     if not dist.is_initialized():
 42 |         return
 43 |     world_size = dist.get_world_size()
 44 |     if world_size == 1:
 45 |         return
 46 |     dist.barrier()
 47 | 
 48 | 
 49 | def all_gather(data):
 50 |     """
 51 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 52 |     Args:
 53 |         data: any picklable object
 54 |     Returns:
 55 |         list[data]: list of data gathered from each rank
 56 |     """
 57 |     world_size = get_world_size()
 58 |     if world_size == 1:
 59 |         return [data]
 60 | 
 61 |     # serialized to a Tensor
 62 |     buffer = pickle.dumps(data)
 63 |     storage = torch.ByteStorage.from_buffer(buffer)
 64 |     tensor = torch.ByteTensor(storage).to("cuda")
 65 | 
 66 |     # obtain Tensor size of each rank
 67 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
 68 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
 69 |     dist.all_gather(size_list, local_size)
 70 |     size_list = [int(size.item()) for size in size_list]
 71 |     max_size = max(size_list)
 72 | 
 73 |     # receiving Tensor from all ranks
 74 |     # we pad the tensor because torch all_gather does not support
 75 |     # gathering tensors of different shapes
 76 |     tensor_list = []
 77 |     for _ in size_list:
 78 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
 79 |     if local_size != max_size:
 80 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
 81 |         tensor = torch.cat((tensor, padding), dim=0)
 82 |     dist.all_gather(tensor_list, tensor)
 83 | 
 84 |     data_list = []
 85 |     for size, tensor in zip(size_list, tensor_list):
 86 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 87 |         data_list.append(pickle.loads(buffer))
 88 | 
 89 |     return data_list
 90 | 
 91 | 
 92 | def shared_random_seed():
 93 |     """
 94 |     Returns:
 95 |         int: a random number that is the same across all workers.
 96 |             If workers need a shared RNG, they can use this shared seed to
 97 |             create one.
 98 | 
 99 |     All workers must call this function, otherwise it will deadlock.
100 |     """
101 |     ints = np.random.randint(2 ** 31)
102 |     all_ints = all_gather(ints)
103 |     return all_ints[0]
104 | 
105 | 
106 | def reduce_dict(input_dict, average=True):
107 |     """
108 |     Args:
109 |         input_dict (dict): all the values will be reduced
110 |         average (bool): whether to do average or sum
111 |     Reduce the values in the dictionary from all processes so that process with rank
112 |     0 has the averaged results. Returns a dict with the same fields as
113 |     input_dict, after reduction.
114 |     """
115 |     world_size = get_world_size()
116 |     if world_size < 2:
117 |         return input_dict
118 |     with torch.no_grad():
119 |         names = []
120 |         values = []
121 |         # sort the keys so that they are consistent across processes
122 |         for k in sorted(input_dict.keys()):
123 |             names.append(k)
124 |             values.append(input_dict[k])
125 |         values = torch.stack(values, dim=0)
126 |         dist.reduce(values, dst=0)
127 |         if dist.get_rank() == 0 and average:
128 |             # only main process gets accumulated, so only divide by
129 |             # world_size in this case
130 |             values /= world_size
131 |         reduced_dict = {k: v for k, v in zip(names, values)}
132 |     return reduced_dict
133 | 


--------------------------------------------------------------------------------
/utils/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/__init__.py


--------------------------------------------------------------------------------
/utils/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | from utils.data.structures.image_list import to_image_list
 2 | 
 3 | 
 4 | class BatchCollator(object):
 5 |     """
 6 |     From a list of samples from the dataset,
 7 |     returns the batched images and targets.
 8 |     This should be passed to the DataLoader
 9 |     """
10 | 
11 |     def __init__(self, size_divisible=0):
12 |         self.size_divisible = size_divisible
13 | 
14 |     def __call__(self, batch):
15 |         transposed_batch = list(zip(*batch))
16 |         images = to_image_list(transposed_batch[0], self.size_divisible)
17 |         targets = transposed_batch[1]
18 |         img_ids = transposed_batch[2]
19 |         return images, targets, img_ids
20 | 


--------------------------------------------------------------------------------
/utils/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco import COCODataset
2 | from .concat_dataset import ConcatDataset
3 | 


--------------------------------------------------------------------------------
/utils/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | import bisect
 2 | 
 3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 4 | 
 5 | 
 6 | class ConcatDataset(_ConcatDataset):
 7 |     """
 8 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
 9 |     method for querying the sizes of the image
10 |     """
11 | 
12 |     def get_idxs(self, idx):
13 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
14 |         if dataset_idx == 0:
15 |             sample_idx = idx
16 |         else:
17 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
18 |         return dataset_idx, sample_idx
19 | 
20 |     def get_img_info(self, idx):
21 |         dataset_idx, sample_idx = self.get_idxs(idx)
22 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
23 | 


--------------------------------------------------------------------------------
/utils/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed import DistributedSampler
2 | from .repeat_factor import RepeatFactorTrainingSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | from .range_sampler import RangeSampler
6 | 


--------------------------------------------------------------------------------
/utils/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.distributed as dist
 4 | from torch.utils.data.sampler import Sampler
 5 | 
 6 | 
 7 | class DistributedSampler(Sampler):
 8 |     """Sampler that restricts data loading to a subset of the dataset.
 9 |     It is especially useful in conjunction with
10 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
11 |     process can pass a DistributedSampler instance as a DataLoader sampler,
12 |     and load a subset of the original dataset that is exclusive to it.
13 |     .. note::
14 |         Dataset is assumed to be of constant size.
15 |     Arguments:
16 |         dataset: Dataset used for sampling.
17 |         num_replicas (optional): Number of processes participating in
18 |             distributed training.
19 |         rank (optional): Rank of the current process within num_replicas.
20 |     """
21 | 
22 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
23 |         if num_replicas is None:
24 |             if not dist.is_available():
25 |                 raise RuntimeError("Requires distributed package to be available")
26 |             num_replicas = dist.get_world_size()
27 |         if rank is None:
28 |             if not dist.is_available():
29 |                 raise RuntimeError("Requires distributed package to be available")
30 |             rank = dist.get_rank()
31 |         self.dataset = dataset
32 |         self.num_replicas = num_replicas
33 |         self.rank = rank
34 |         self.epoch = 0
35 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
36 |         self.total_size = self.num_samples * self.num_replicas
37 |         self.shuffle = shuffle
38 | 
39 |     def __iter__(self):
40 |         if self.shuffle:
41 |             # deterministically shuffle based on epoch
42 |             g = torch.Generator()
43 |             g.manual_seed(self.epoch)
44 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
45 |         else:
46 |             indices = torch.arange(len(self.dataset)).tolist()
47 | 
48 |         # add extra samples to make it evenly divisible
49 |         indices += indices[: (self.total_size - len(indices))]
50 |         assert len(indices) == self.total_size
51 | 
52 |         # subsample
53 |         offset = self.num_samples * self.rank
54 |         indices = indices[offset : offset + self.num_samples]
55 |         assert len(indices) == self.num_samples
56 | 
57 |         return iter(indices)
58 | 
59 |     def __len__(self):
60 |         return self.num_samples
61 | 
62 |     def set_epoch(self, epoch):
63 |         self.epoch = epoch
64 | 


--------------------------------------------------------------------------------
/utils/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.sampler import BatchSampler
 2 | 
 3 | 
 4 | class IterationBasedBatchSampler(BatchSampler):
 5 |     """
 6 |     Wraps a BatchSampler, resampling from it until
 7 |     a specified number of iterations have been sampled
 8 |     """
 9 | 
10 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
11 |         self.batch_sampler = batch_sampler
12 |         self.num_iterations = num_iterations
13 |         self.start_iter = start_iter
14 | 
15 |     def __iter__(self):
16 |         iteration = self.start_iter
17 |         while iteration <= self.num_iterations:
18 |             # if the underlying sampler has a set_epoch method, like
19 |             # DistributedSampler, used for making each process see
20 |             # a different split of the dataset, then set it
21 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
22 |                 self.batch_sampler.sampler.set_epoch(iteration)
23 |             for batch in self.batch_sampler:
24 |                 iteration += 1
25 |                 if iteration > self.num_iterations:
26 |                     break
27 |                 yield batch
28 | 
29 |     def __len__(self):
30 |         return self.num_iterations
31 | 


--------------------------------------------------------------------------------
/utils/data/samplers/range_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data.sampler import Sampler
 3 | 
 4 | 
 5 | class RangeSampler(Sampler):
 6 |     def __init__(self, start_ind, end_ind):
 7 |         self.start_ind = start_ind
 8 |         self.end_ind = end_ind
 9 | 
10 |     def __iter__(self):
11 |         indices = torch.arange(self.start_ind, self.end_ind).tolist()
12 |         return iter(indices)
13 | 
14 |     def __len__(self):
15 |         return self.end_ind - self.start_ind
16 | 


--------------------------------------------------------------------------------
/utils/data/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/structures/__init__.py


--------------------------------------------------------------------------------
/utils/data/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ImageList(object):
 5 |     """
 6 |     Structure that holds a list of images (of possibly
 7 |     varying sizes) as a single tensor.
 8 |     This works by padding the images to the same size,
 9 |     and storing in a field the original sizes of each image
10 |     """
11 | 
12 |     def __init__(self, tensors, image_sizes):
13 |         """
14 |         Arguments:
15 |             tensors (tensor)
16 |             image_sizes (list[tuple[int, int]])
17 |         """
18 |         self.tensors = tensors
19 |         self.image_sizes = image_sizes
20 | 
21 |     def to(self, *args, **kwargs):
22 |         cast_tensor = self.tensors.to(*args, **kwargs)
23 |         return ImageList(cast_tensor, self.image_sizes)
24 | 
25 | 
26 | def to_image_list(tensors, size_divisible=0):
27 |     """
28 |     tensors can be an ImageList, a torch.Tensor or
29 |     an iterable of Tensors. It can't be a numpy array.
30 |     When tensors is an iterable of Tensors, it pads
31 |     the Tensors with zeros so that they have the same
32 |     shape
33 |     """
34 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
35 |         tensors = [tensors]
36 | 
37 |     if isinstance(tensors, ImageList):
38 |         return tensors
39 |     elif isinstance(tensors, torch.Tensor):
40 |         # single tensor shape can be inferred
41 |         if tensors.dim() == 3:
42 |             tensors = tensors[None]
43 |         assert tensors.dim() == 4
44 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
45 |         return ImageList(tensors, image_sizes)
46 |     elif isinstance(tensors, (tuple, list)):
47 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
48 | 
49 |         # TODO Ideally, just remove this and let me model handle arbitrary
50 |         # input sizs
51 |         if size_divisible > 0:
52 |             import math
53 | 
54 |             stride = size_divisible
55 |             max_size = list(max_size)
56 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
57 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
58 |             max_size = tuple(max_size)
59 | 
60 |         batch_shape = (len(tensors),) + max_size
61 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
62 |         for img, pad_img in zip(tensors, batched_imgs):
63 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
64 | 
65 |         image_sizes = [im.shape[-2:] for im in tensors]
66 | 
67 |         return ImageList(batched_imgs, image_sizes)
68 |     else:
69 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
70 | 


--------------------------------------------------------------------------------
/utils/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *
2 | 


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def aspect_ratio_rel(im, aspect_ratio):
 6 |     """Performs width-relative aspect ratio transformation."""
 7 |     im_h, im_w = im.shape[:2]
 8 |     im_ar_w = int(round(aspect_ratio * im_w))
 9 |     im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
10 |     return im_ar
11 | 
12 | 
13 | def aspect_ratio_abs(im, aspect_ratio):
14 |     """Performs absolute aspect ratio transformation."""
15 |     im_h, im_w = im.shape[:2]
16 |     im_area = im_h * im_w
17 | 
18 |     im_ar_w = np.sqrt(im_area * aspect_ratio)
19 |     im_ar_h = np.sqrt(im_area / aspect_ratio)
20 |     assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
21 | 
22 |     im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
23 |     return im_ar
24 | 


--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import os
 3 | import logging
 4 | import numpy as np
 5 | from six.moves import cPickle as pickle
 6 | 
 7 | import torch
 8 | import torch.distributed as dist
 9 | 
10 | # Set up logging and load config options
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | # logging only in rank 0
16 | def logging_rank(sstr, distributed=True, local_rank=0):
17 |     if distributed and local_rank == 0:
18 |         logger.info(sstr)
19 |     elif not distributed:
20 |         logger.info(sstr)
21 |     return 0
22 | 
23 | 
24 | def get_mean_and_std(dataset):
25 |     """Compute the mean and std value of dataset."""
26 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
27 | 
28 |     mean = torch.zeros(3)
29 |     std = torch.zeros(3)
30 |     logger.info('Computing mean and std..')
31 |     for inputs, targets in dataloader:
32 |         for i in range(3):
33 |             mean[i] += inputs[:, i, :, :].mean()
34 |             std[i] += inputs[:, i, :, :].std()
35 |     mean.div_(len(dataset))
36 |     std.div_(len(dataset))
37 |     return mean, std
38 | 
39 | 
40 | def mkdir_p(path):
41 |     """make dir if not exist"""
42 |     try:
43 |         os.makedirs(path)
44 |     except OSError as exc:  # Python >2.5
45 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
46 |             pass
47 |         else:
48 |             raise
49 | 
50 |             
51 | def save_object(obj, file_name):
52 |     """Save a Python object by pickling it."""
53 |     file_name = os.path.abspath(file_name)
54 |     with open(file_name, 'wb') as f:
55 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
56 | 
57 |         
58 | def get_world_size() -> int:
59 |     if not dist.is_available():
60 |         return 1
61 |     if not dist.is_initialized():
62 |         return 1
63 |     return dist.get_world_size()
64 | 


--------------------------------------------------------------------------------
/utils/optimizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from utils.misc import logging_rank
 5 | 
 6 | 
 7 | class Optimizer(object):
 8 |     def __init__(self, model, solver, local_rank=0):
 9 |         self.model = model
10 |         self.solver = solver
11 |         self.local_rank = local_rank
12 | 
13 |         self.bias_params_list = []
14 |         self.gn_params_list = []
15 |         self.nonbias_params_list = []
16 | 
17 |         self.params = []
18 |         self.gn_param_nameset = self.get_gn_param_nameset()
19 | 
20 |     def get_gn_param_nameset(self):
21 |         gn_param_nameset = set()
22 |         for name, module in self.model.named_modules():
23 |             if isinstance(module, nn.GroupNorm):
24 |                 gn_param_nameset.add(name + '.weight')
25 |                 gn_param_nameset.add(name + '.bias')
26 |         return gn_param_nameset
27 | 
28 |     def get_params_list(self):
29 |         for key, value in self.model.named_parameters():
30 |             if value.requires_grad:
31 |                 if 'bias' in key:
32 |                     self.bias_params_list.append(value)
33 |                 elif key in self.gn_param_nameset:
34 |                     self.gn_params_list.append(value)
35 |                 else:
36 |                     self.nonbias_params_list.append(value)
37 |             else:
38 |                 logging_rank('{} does not need grad.'.format(key), local_rank=self.local_rank)
39 | 
40 |     def get_params(self):
41 |         self.params += [
42 |             {'params': self.nonbias_params_list,
43 |              'lr': 0,
44 |              'weight_decay': self.solver.WEIGHT_DECAY,
45 |              'lr_scale': 1},
46 |             {'params': self.bias_params_list,
47 |              'lr': 0 * (self.solver.BIAS_DOUBLE_LR + 1),
48 |              'weight_decay': self.solver.WEIGHT_DECAY if self.solver.BIAS_WEIGHT_DECAY else 0,
49 |              'lr_scale': self.solver.BIAS_DOUBLE_LR + 1},
50 |             {'params': self.gn_params_list,
51 |              'lr': 0,
52 |              'weight_decay': self.solver.WEIGHT_DECAY_GN * self.solver.WEIGHT_DECAY,
53 |              'lr_scale': 1}
54 |         ]
55 | 
56 |     def build(self):
57 |         assert self.solver.OPTIMIZER in ['SGD', 'RMSPROP', 'ADAM']
58 |         self.get_params_list()
59 |         self.get_params()
60 | 
61 |         if self.solver.OPTIMIZER == 'SGD':
62 |             optimizer = torch.optim.SGD(
63 |                 self.params,
64 |                 momentum=self.solver.MOMENTUM
65 |             )
66 |         elif self.solver.OPTIMIZER == 'RMSPROP':
67 |             optimizer = torch.optim.RMSprop(
68 |                 self.params,
69 |                 momentum=self.solver.MOMENTUM
70 |             )
71 |         elif self.solver.OPTIMIZER == 'ADAM':
72 |             optimizer = torch.optim.Adam(
73 |                 self.model.parameters(),
74 |                 lr=self.solver.BASE_LR
75 |             )
76 |         else:
77 |             optimizer = None
78 |         return optimizer
79 | 


--------------------------------------------------------------------------------
/utils/registry.py:
--------------------------------------------------------------------------------
 1 | def _register_generic(module_dict, module_name, module):
 2 |     assert module_name not in module_dict
 3 |     module_dict[module_name] = module
 4 | 
 5 | 
 6 | class Registry(dict):
 7 |     """
 8 |     A helper class for managing registering modules, it extends a dictionary
 9 |     and provides a register functions.
10 |     Eg. creeting a registry:
11 |         some_registry = Registry({"default": default_module})
12 |     There're two ways of registering new modules:
13 |     1): normal way is just calling register function:
14 |         def foo():
15 |             ...
16 |         some_registry.register("foo_module", foo)
17 |     2): used as decorator when declaring the module:
18 |         @some_registry.register("foo_module")
19 |         @some_registry.register("foo_modeul_nickname")
20 |         def foo():
21 |             ...
22 |     Access of module is just like using a dictionary, eg:
23 |         f = some_registry["foo_modeul"]
24 |     """
25 |     def __init__(self, *args, **kwargs):
26 |         super(Registry, self).__init__(*args, **kwargs)
27 | 
28 |     def register(self, module_name, module=None):
29 |         # used as function call
30 |         if module is not None:
31 |             _register_generic(self, module_name, module)
32 |             return
33 | 
34 |         # used as decorator
35 |         def register_fn(fn):
36 |             _register_generic(self, module_name, fn)
37 |             return fn
38 | 
39 |         return register_fn
40 | 
41 | 


--------------------------------------------------------------------------------
/utils/subprocess.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | import subprocess
 4 | import numpy as np
 5 | from io import IOBase
 6 | from six.moves import shlex_quote
 7 | from six.moves import cPickle as pickle
 8 | 
 9 | from utils.misc import logging_rank
10 | 
11 | 
12 | def process_in_parallel(tag, total_range_size, binary, cfg, ckpt_path):
13 |     """Run the specified binary NUM_GPUS times in parallel, each time as a
14 |     subprocess that uses one GPU. The binary must accept the command line
15 |     arguments `--range {start} {end}` that specify a data processing range.
16 |     """
17 |     # subprocesses
18 |     cfg_file = os.path.join(ckpt_path, 'test', '{}_range_config.yaml'.format(tag))
19 |     with open(cfg_file, 'w') as f:
20 |         yaml.dump(cfg, stream=f)
21 |     subprocess_env = os.environ.copy()
22 |     processes = []
23 |     # Determine GPUs to use
24 |     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
25 |     if cuda_visible_devices:
26 |         gpu_inds = list(map(int, cuda_visible_devices.split(',')))
27 |         assert -1 not in gpu_inds, \
28 |             'Hiding GPU indices using the \'-1\' index is not supported'
29 |     else:
30 |         raise NotImplementedError
31 |     subinds = np.array_split(range(total_range_size), len(gpu_inds))
32 |     # Run the binary in cfg.NUM_GPUS subprocesses
33 |     for i, gpu_ind in enumerate(gpu_inds):
34 |         start = subinds[i][0]
35 |         end = subinds[i][-1] + 1
36 |         subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
37 |         cmd = ('python {binary} --range {start} {end} --cfg {cfg_file} --gpu_id {gpu_id}')
38 |         cmd = cmd.format(
39 |             binary=shlex_quote(binary),
40 |             start=int(start),
41 |             end=int(end),
42 |             cfg_file=shlex_quote(cfg_file),
43 |             gpu_id=str(gpu_ind),
44 |         )
45 |         logging_rank('{} range command {}: {}'.format(tag, i, cmd))
46 |         if i == 0:
47 |             subprocess_stdout = subprocess.PIPE
48 |         else:
49 |             filename = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end))
50 |             subprocess_stdout = open(filename, 'w')
51 |         p = subprocess.Popen(
52 |             cmd,
53 |             shell=True,
54 |             env=subprocess_env,
55 |             stdout=subprocess_stdout,
56 |             stderr=subprocess.STDOUT,
57 |             bufsize=1
58 |         )
59 |         processes.append((i, p, start, end, subprocess_stdout))
60 |     # Log output from inference processes and collate their results
61 |     outputs = []
62 |     for i, p, start, end, subprocess_stdout in processes:
63 |         log_subprocess_output(i, p, ckpt_path, tag, start, end)
64 |         if isinstance(subprocess_stdout, IOBase):
65 |             subprocess_stdout.close()
66 |         range_file = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.pkl' % (tag, start, end))
67 |         range_data = pickle.load(open(range_file, 'rb'))
68 |         outputs.append(range_data)
69 |     return outputs
70 | 
71 | 
72 | def log_subprocess_output(i, p, ckpt_path, tag, start, end):
73 |     """Capture the output of each subprocess and log it in the parent process.
74 |     The first subprocess's output is logged in realtime. The output from the
75 |     other subprocesses is buffered and then printed all at once (in order) when
76 |     subprocesses finish.
77 |     """
78 |     outfile = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end))
79 |     logging_rank('# ' + '-' * 76 + ' #')
80 |     logging_rank('stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end))
81 |     logging_rank('# ' + '-' * 76 + ' #')
82 |     if i == 0:
83 |         # Stream the piped stdout from the first subprocess in realtime
84 |         with open(outfile, 'w') as f:
85 |             for line in iter(p.stdout.readline, b''):
86 |                 print(line.rstrip().decode('ascii'))
87 |                 f.write(str(line, encoding='ascii'))
88 |         p.stdout.close()
89 |         ret = p.wait()
90 |     else:
91 |         # For subprocesses >= 1, wait and dump their log file
92 |         ret = p.wait()
93 |         with open(outfile, 'r') as f:
94 |             print(''.join(f.readlines()))
95 |     assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)
96 | 


--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Timer(object):
 5 |     """A simple timer."""
 6 | 
 7 |     def __init__(self):
 8 |         self.reset()
 9 | 
10 |     def tic(self):
11 |         # using time.time instead of time.clock because time time.clock
12 |         # does not normalize for multithreading
13 |         self.start_time = time.time()
14 | 
15 |     def toc(self, average=True):
16 |         self.diff = time.time() - self.start_time
17 |         self.total_time += self.diff
18 |         self.calls += 1
19 |         self.average_time = self.total_time / self.calls
20 |         if average:
21 |             return self.average_time
22 |         else:
23 |             return self.diff
24 | 
25 |     def reset(self):
26 |         self.total_time = 0.
27 |         self.calls = 0
28 |         self.start_time = 0.
29 |         self.diff = 0.
30 |         self.average_time = 0.
31 | 


--------------------------------------------------------------------------------
/weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/weights/README.md


--------------------------------------------------------------------------------