├── INSTALL.md
├── LICENSE
├── README.md
├── cfgs
├── CIHP
│ ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml
│ └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml
├── DensePose_COCO
│ └── e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml
└── MHP-v2
│ ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml
│ └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml
├── ckpts
└── README.md
├── data
├── output.png
└── parsing_rcnn.png
├── make.sh
├── models
├── __init__.py
├── imagenet
│ ├── __init__.py
│ ├── hrnet.py
│ ├── mobilenet_v1.py
│ ├── mobilenet_v2.py
│ ├── mobilenet_v3.py
│ ├── resnet.py
│ ├── resnext.py
│ ├── utils.py
│ └── vovnet.py
└── ops
│ ├── __init__.py
│ ├── adjust_smooth_l1_loss.py
│ ├── affine.py
│ ├── batch_norm.py
│ ├── bilinear_interpolation2d.py
│ ├── boxes.py
│ ├── context_block.py
│ ├── conv2d_samepadding.py
│ ├── conv2d_ws.py
│ ├── csrc
│ ├── PoolPointsInterp.h
│ ├── ROIAlign.h
│ ├── ROIPool.h
│ ├── SigmoidFocalLoss.h
│ ├── cpu
│ │ ├── ROIAlign_cpu.cpp
│ │ ├── nms_cpu.cpp
│ │ └── vision.h
│ ├── cuda
│ │ ├── PoolPointsInterp_cuda.cu
│ │ ├── ROIAlign_cuda.cu
│ │ ├── ROIPool_cuda.cu
│ │ ├── SigmoidFocalLoss_cuda.cu
│ │ ├── deform_conv_cuda.cu
│ │ ├── deform_conv_kernel_cuda.cu
│ │ ├── deform_pool_cuda.cu
│ │ ├── deform_pool_kernel_cuda.cu
│ │ ├── ml_nms.cu
│ │ ├── nms.cu
│ │ └── vision.h
│ ├── deform_conv.h
│ ├── deform_pool.h
│ ├── ml_nms.h
│ ├── nms.h
│ └── vision.cpp
│ ├── cython_bbox.c
│ ├── cython_bbox.pyx
│ ├── cython_nms.c
│ ├── cython_nms.pyx
│ ├── dcn
│ ├── __init__.py
│ ├── deform_conv_func.py
│ ├── deform_conv_module.py
│ ├── deform_pool_func.py
│ └── deform_pool_module.py
│ ├── dropblock.py
│ ├── iou_loss.py
│ ├── l2_loss.py
│ ├── l2norm.py
│ ├── label_smoothing.py
│ ├── lovasz_hinge_loss.py
│ ├── misc.py
│ ├── mixture_batchnorm.py
│ ├── nms.py
│ ├── nonlocal2d.py
│ ├── pool_points_interp.py
│ ├── scale.py
│ ├── setup_rcnn.py
│ ├── setup_ssd.py
│ ├── sigmoid_focal_loss.py
│ ├── smooth_l1_loss.py
│ └── squeeze_excitation.py
├── rcnn
├── __init__.py
├── core
│ ├── __init__.py
│ ├── config.py
│ ├── test.py
│ └── test_engine.py
├── datasets
│ ├── __init__.py
│ ├── dataset.py
│ ├── dataset_catalog.py
│ ├── evaluation.py
│ └── transform.py
├── modeling
│ ├── backbone
│ │ ├── HRNet.py
│ │ ├── MobileNet_v1.py
│ │ ├── MobileNet_v2.py
│ │ ├── MobileNet_v3.py
│ │ ├── ResNeXt.py
│ │ ├── ResNet.py
│ │ ├── VoVNet.py
│ │ └── __init__.py
│ ├── cascade_rcnn
│ │ ├── __init__.py
│ │ ├── cascade_rcnn.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ ├── convfc_heads.py
│ │ │ └── mlp_heads.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ └── outputs.py
│ ├── fast_rcnn
│ │ ├── __init__.py
│ │ ├── fast_rcnn.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ ├── convfc_heads.py
│ │ │ └── mlp_heads.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ └── outputs.py
│ ├── fpn
│ │ ├── FPN.py
│ │ ├── HRFPN.py
│ │ └── __init__.py
│ ├── keypoint_rcnn
│ │ ├── __init__.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ ├── convx_heads.py
│ │ │ └── gce_heads.py
│ │ ├── inference.py
│ │ ├── keypoint_rcnn.py
│ │ ├── loss.py
│ │ └── outputs.py
│ ├── mask_rcnn
│ │ ├── __init__.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ └── convx_heads.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ ├── mask_rcnn.py
│ │ └── outputs.py
│ ├── model_builder.py
│ ├── parsing_rcnn
│ │ ├── __init__.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ ├── convx_heads.py
│ │ │ └── gce_heads.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ ├── outputs.py
│ │ └── parsing_rcnn.py
│ ├── registry.py
│ ├── rpn
│ │ ├── __init__.py
│ │ ├── anchor_generator.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ └── rpn.py
│ └── uv_rcnn
│ │ ├── heads
│ │ ├── __init__.py
│ │ ├── convx_heads.py
│ │ └── gce_heads.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ ├── outputs.py
│ │ └── uv_rcnn.py
├── ops
│ ├── __init__.py
│ ├── deform_pool.py
│ ├── roi_align.py
│ └── roi_pool.py
└── utils
│ ├── __init__.py
│ ├── balanced_positive_negative_sampler.py
│ ├── box_coder.py
│ ├── matcher.py
│ ├── misc.py
│ └── poolers.py
├── requirements.txt
├── tools
├── _init_paths.py
├── test_net.py
└── train_net.py
├── utils
├── __init__.py
├── checkpointer.py
├── collections.py
├── colormap.py
├── comm.py
├── data
│ ├── __init__.py
│ ├── collate_batch.py
│ ├── dataset_catalog.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── coco.py
│ │ └── concat_dataset.py
│ ├── evaluation
│ │ ├── densepose_cocoeval.py
│ │ ├── densepose_methods.py
│ │ └── parsing_eval.py
│ ├── samplers
│ │ ├── __init__.py
│ │ ├── distributed.py
│ │ ├── grouped_batch_sampler.py
│ │ ├── iteration_based_batch_sampler.py
│ │ ├── range_sampler.py
│ │ └── repeat_factor.py
│ ├── structures
│ │ ├── __init__.py
│ │ ├── bounding_box.py
│ │ ├── boxlist_ops.py
│ │ ├── densepose_uv.py
│ │ ├── image_list.py
│ │ ├── keypoint.py
│ │ ├── parsing.py
│ │ └── segmentation_mask.py
│ └── transforms
│ │ ├── __init__.py
│ │ └── transforms.py
├── image.py
├── logger.py
├── lr_scheduler.py
├── measure.py
├── misc.py
├── net.py
├── optimizer.py
├── registry.py
├── subprocess.py
├── timer.py
└── vis.py
└── weights
└── README.md
/INSTALL.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```
4 | # install pytorch 1.1 and torchvision
5 | sudo pip3 install torch==1.1 torchvision
6 |
7 | # install apex
8 | cd $INSTALL_DIR
9 | git clone https://github.com/NVIDIA/apex.git
10 | cd apex
11 | sudo python setup.py install --cuda_ext --cpp_ext
12 |
13 | # clone Hier-R-CNN
14 | git clone https://github.com/soeaver/Parsing-R-CNN.git
15 |
16 | # install other requirements
17 | pip3 install -r requirements.txt
18 |
19 | # mask ops
20 | cd Hier-R-CNN
21 | sh make.sh
22 |
23 | # make cocoapi
24 | cd Parsing-R-CNN/cocoapi/PythonAPI
25 | mask
26 | cd ../../
27 | ln -s cocoapi/PythonAPI/pycocotools/ ./
28 | ```
29 |
30 | ## Data and Pre-train weights
31 |
32 | Make sure to put the files as the following structure:
33 |
34 | ```
35 | ├─data
36 | │ ├─coco
37 | │ │ ├─images
38 | │ │ │ ├─train2017
39 | │ │ │ ├─val2017
40 | │ │ ├─annotations
41 | │ │ │ ├─DensePoseData
42 | │ │ │ │ ├─densepose_coco_train2017.json
43 | │ │ │ │ ├─densepose_coco_val2017.json
44 | │ │ │ │ ├─densepose_coco_test2017.json
45 | | |
46 | │ ├─CIHP
47 | │ │ ├─train_img
48 | │ │ │─train_parsing
49 | │ │ │─train_seg
50 | │ │ ├─val_img
51 | │ │ │─val_parsing
52 | │ │ │─val_seg
53 | │ │ ├─annotations
54 | │ │ │ ├─CIHP_train.json
55 | │ │ │ ├─CIHP_val.json
56 | | |
57 | │ ├─MHP-v2
58 | │ │ ├─train_img
59 | │ │ │─train_parsing
60 | │ │ │─train_seg
61 | │ │ ├─val_img
62 | │ │ │─val_parsing
63 | │ │ │─val_seg
64 | │ │ ├─annotations
65 | │ │ │ ├─MHP-v2_train.json
66 | │ │ │ ├─MHP-v2_val.json
67 | |
68 | ├─weights
69 | ├─resnet50_caffe.pth
70 | ├─resnet101_caffe.pth
71 | ├─resnext101_32x8d-8ba56ff5.pth
72 |
73 | ```
74 |
75 | - Densepose estimation using original coco images.
76 | - For training and evaluating densepose estimation on Parsing R-CNN, you need fetch DensePose data following [original repo](https://github.com/facebookresearch/DensePose/blob/master/INSTALL.md#fetch-densepose-data)
77 |
78 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Parsing-R-CNN
2 |
3 | [](https://paperswithcode.com/sota/human-part-segmentation-on-cihp?p=parsing-r-cnn-for-instance-level-human)
4 |
5 | [](https://paperswithcode.com/sota/pose-estimation-on-densepose-coco?p=parsing-r-cnn-for-instance-level-human)
6 |
7 | [](https://paperswithcode.com/sota/human-part-segmentation-on-mhp-v20?p=parsing-r-cnn-for-instance-level-human)
8 |
9 | **(New!)** Official implementation of **Parsing R-CNN for Instance-Level Human Analysis (CVPR 2019)**
10 |
11 | ## Citing Parsing R-CNN
12 |
13 | If you use Parsing R-CNN, please use the following BibTeX entry.
14 |
15 | ```BibTeX
16 | @inproceedings{yang2019cvpr,
17 | title = {Parsing R-CNN for Instance-Level Human Analysis},
18 | author = {Lu Yang and Qing Song and Zhihui Wang and Ming Jiang},
19 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
20 | year = {2019}
21 | }
22 |
23 | ```
24 |
25 | In this repository, we release the Parsing R-CNN code in Pytorch.
26 |
27 | - Parsing R-CNN architecture:
28 |

29 |
30 | - Parsing R-CNN output:
31 | 
32 |
33 |
34 | ## Installation
35 | - 8 x TITAN RTX GPU
36 | - pytorch1.1
37 | - python3.6.8
38 |
39 | Install Parsing R-CNN following [INSTALL.md](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#install).
40 |
41 |
42 | ## Dataset
43 |
44 | You need to download the datasets and annotations following this repo's formate. As:
45 |
46 | - [CIHP](https://drive.google.com/open?id=1OLBd23ufm6CU8CZmLEYMdF-x2b8mRgxV)
47 |
48 | - [MHP-v2](coming soon)
49 |
50 | - [DensePoseData](https://drive.google.com/open?id=1WiTLYVIgMyCDENXHPVEWW7qbZ-3EBjbt)(using original [MSCOCO2017](http://cocodataset.org/#download) images)
51 |
52 | And following [data structure](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#data-and-pre-train-weights) to train or evaluate Parsing R-CNN models.
53 |
54 |
55 | ## Results and Models
56 |
57 | **On CIHP val**
58 |
59 | | Backbone | LR | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD |
60 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:|
61 | | R-50-FPN | 1x | 65.8 | 52.8 | 57.2/51.2/55.4 | |
62 | | R-50-FPN | 3x | 68.7 | 56.0 | 64.1/54.1/60.7 | [GoogleDrive](https://drive.google.com/open?id=16bASrD7AoCADKzXynIgmdyzmbuzCfAUL)|
63 |
64 |
65 | **On MHP-v2 val**
66 |
67 | | Backbone | LR | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD |
68 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:|
69 | | R-50-FPN | 1x | 66.5 | 34.0 | 19.9/36.7/32.4 | |
70 | | R-50-FPN | 3x | 69.0 | 36.1 | 27.4/40.5/38.3 | [GoogleDrive](https://drive.google.com/open?id=1rbSNP4_DoJdNK4l6KHrthO0x4WOFgHGy)|
71 |
72 |
73 | **On DensePose_COCO val**
74 |
75 | | Backbone | LR | Det AP |UV AP (AP/AP50/AP75/APm/APl)| DOWNLOAD |
76 | |------------|:----:|:------:|:--------------------------:| :-------:|
77 | | R-50-FPN | s1x | 57.4 | 59.3/90.5/68.7/56.2/60.8 | [GoogleDrive](https://drive.google.com/open?id=1YQygKoOb5SbZWYnF7f9vEpC_NenpMhH5)|
78 |
79 |
80 | - New metric GPSm is adopted for evaluating UV
81 |
82 |
83 | **ImageNet pretrained weight**
84 |
85 | - [R-50](https://drive.google.com/open?id=1EtqFhrFTdBJNbp67effArVrTNx4q_ELr)
86 | - [R-50-GN](https://drive.google.com/open?id=1LzcVD7aADhXXY32DdtKhaY9hTXaduhlg)
87 | - [X-101-32x8d](https://drive.google.com/open?id=1c4OSVZIZtDT49B0DTC0tK3vcRgJpzR9n)
88 |
89 |
90 | ## Visualization
91 |
92 | coming soon.
93 |
94 |
95 | ## Training
96 |
97 | To train a model with 8 GPUs run:
98 | ```
99 | python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --cfg cfgs/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml
100 | ```
101 |
102 |
103 | ## Evaluation
104 |
105 | ### multi-gpu evaluation,
106 | ```
107 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0,1,2,3,4,5,6,7
108 | ```
109 |
110 | ### single-gpu evaluation,
111 | ```
112 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0
113 | ```
114 |
115 |
116 | ## License
117 | Parsing-R-CNN is released under the [MIT license](https://github.com/soeaver/Parsing-R-CNN/blob/master/LICENSE).
118 |
--------------------------------------------------------------------------------
/cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml:
--------------------------------------------------------------------------------
1 | # bbox_AP: 65.8/92.8/73.7/3.4/46.8/68.7;
2 | # parsing: (mIoU:52.8/AP50:57.2/APvol:51.2/PCP50:55.4)
3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
4 | PIXEL_STDS: [1.0, 1.0, 1.0]
5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms'
6 | MODEL:
7 | FPN_ON: True
8 | FASTER_ON: True
9 | PARSING_ON: True
10 | NUM_CLASSES: 2
11 | CONV1_RGB2BGR: False # caffe style
12 | BACKBONE:
13 | CONV_BODY: "resnet"
14 | RESNET: # caffe style
15 | LAYERS: (3, 4, 6, 3)
16 | RPN:
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 | ROI_XFORM_RESOLUTION: (7, 7)
24 | ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 | ROI_PARSING_HEAD: "roi_gce_head"
27 | NUM_PARSING: 20
28 | ROI_STRIDES: [4]
29 | ROI_SIZE_PER_IMG: 16
30 | ROI_XFORM_RESOLUTION: (32, 32)
31 | ROI_XFORM_SAMPLING_RATIO: 2
32 | RESOLUTION: (128, 128)
33 | LOSS_WEIGHT: 2.0 # double loss weight
34 | GCE_HEAD:
35 | NUM_CONVS_AFTER_ASPPV3: 4
36 | USE_NL: True
37 | SOLVER:
38 | WEIGHT_DECAY: 0.0001
39 | BASE_LR: 0.02
40 | GAMMA: 0.1
41 | WARM_UP_ITERS: 500
42 | WARM_UP_FACTOR: 0.01
43 | MAX_ITER: 45000
44 | STEPS: [30000, 40000]
45 | SNAPSHOT_ITERS: 5000
46 | TRAIN:
47 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
48 | DATASETS: ('CIHP_train', )
49 | SCALES: (512, 640, 704, 768, 800, 864)
50 | MAX_SIZE: 1400
51 | TEST:
52 | DATASETS: ('CIHP_val',)
53 | SCALE: 800
54 | MAX_SIZE: 1333
55 |
--------------------------------------------------------------------------------
/cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | # bbox_AP: 68.7/93.0/76.2/2.0/48.0/71.8;
2 | # parsing: (mIoU:56.0/AP50:64.1/APvol:54.1/PCP50:60.7)
3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
4 | PIXEL_STDS: [1.0, 1.0, 1.0]
5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms'
6 | MODEL:
7 | FPN_ON: True
8 | FASTER_ON: True
9 | PARSING_ON: True
10 | NUM_CLASSES: 2
11 | CONV1_RGB2BGR: False # caffe style
12 | BACKBONE:
13 | CONV_BODY: "resnet"
14 | RESNET: # caffe style
15 | LAYERS: (3, 4, 6, 3)
16 | RPN:
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 | ROI_XFORM_RESOLUTION: (7, 7)
24 | ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 | ROI_PARSING_HEAD: "roi_gce_head"
27 | NUM_PARSING: 20
28 | ROI_STRIDES: [4]
29 | ROI_SIZE_PER_IMG: 16
30 | ROI_XFORM_RESOLUTION: (32, 32)
31 | ROI_XFORM_SAMPLING_RATIO: 2
32 | RESOLUTION: (128, 128)
33 | LOSS_WEIGHT: 2.0 # double loss weight
34 | GCE_HEAD:
35 | NUM_CONVS_AFTER_ASPPV3: 4
36 | USE_NL: True
37 | SOLVER:
38 | WEIGHT_DECAY: 0.0001
39 | BASE_LR: 0.02
40 | GAMMA: 0.1
41 | WARM_UP_ITERS: 500
42 | WARM_UP_FACTOR: 0.01
43 | MAX_ITER: 135000
44 | STEPS: [105000, 125000]
45 | TRAIN:
46 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
47 | DATASETS: ('CIHP_train', )
48 | SCALES: (512, 640, 704, 768, 800, 864)
49 | MAX_SIZE: 1400
50 | TEST:
51 | DATASETS: ('CIHP_val',)
52 | SCALE: 800
53 | MAX_SIZE: 1333
54 |
--------------------------------------------------------------------------------
/cfgs/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml:
--------------------------------------------------------------------------------
1 | # bbox_AP: 57.4/87.6/62.7/30.3/56.2/70.2;
2 | # uv_AP (GPSm): 59.3/90.5/68.7/52.6/60.8;
3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
4 | PIXEL_STDS: [1.0, 1.0, 1.0]
5 | CKPT: 'ckpts/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms'
6 | MODEL:
7 | FPN_ON: True
8 | FASTER_ON: True
9 | UV_ON: True
10 | NUM_CLASSES: 2
11 | CONV1_RGB2BGR: False # caffe style
12 | BACKBONE:
13 | CONV_BODY: "resnet"
14 | RESNET: # caffe style
15 | LAYERS: (3, 4, 6, 3)
16 | RPN:
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 | ROI_XFORM_RESOLUTION: (7, 7)
24 | ROI_XFORM_SAMPLING_RATIO: 2
25 | UVRCNN:
26 | ROI_UV_HEAD: "roi_gce_head"
27 | ROI_STRIDES: [4]
28 | ROI_SIZE_PER_IMG: 32
29 | ROI_XFORM_RESOLUTION: (32, 32)
30 | ROI_XFORM_SAMPLING_RATIO: 2
31 | RESOLUTION: (128, 128)
32 | INDEX_WEIGHTS : 2.0
33 | PART_WEIGHTS : 0.3
34 | POINT_REGRESSION_WEIGHTS : 0.1
35 | GCE_HEAD:
36 | NUM_CONVS_AFTER_ASPPV3: 4
37 | USE_NL: True
38 | SOLVER:
39 | WEIGHT_DECAY: 0.0001
40 | BASE_LR: 0.002
41 | GAMMA: 0.1
42 | WARM_UP_ITERS: 500
43 | MAX_ITER: 130000
44 | STEPS: [100000, 120000]
45 | TRAIN:
46 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
47 | DATASETS: ('dense_coco_2017_train', )
48 | SCALES: (512, 640, 704, 768, 800, 864)
49 | MAX_SIZE: 1400
50 | TEST:
51 | DATASETS: ('dense_coco_2017_val',)
52 | SCALE: 800
53 | MAX_SIZE: 1333
54 |
--------------------------------------------------------------------------------
/cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml:
--------------------------------------------------------------------------------
1 | # bbox_AP: 66.5/93.8/76.8/-1.0/52.0/66.7;
2 | # parsing: (mIoU:34.0/AP50:19.9/APvol:37.6/PCP50:32.4)
3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
4 | PIXEL_STDS: [1.0, 1.0, 1.0]
5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms'
6 | MODEL:
7 | FPN_ON: True
8 | FASTER_ON: True
9 | PARSING_ON: True
10 | NUM_CLASSES: 2
11 | CONV1_RGB2BGR: False # caffe style
12 | BACKBONE:
13 | CONV_BODY: "resnet"
14 | RESNET: # caffe style
15 | LAYERS: (3, 4, 6, 3)
16 | RPN:
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 | ROI_XFORM_RESOLUTION: (7, 7)
24 | ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 | ROI_PARSING_HEAD: "roi_gce_head"
27 | NUM_PARSING: 59
28 | ROI_STRIDES: [4]
29 | ROI_SIZE_PER_IMG: 16
30 | ROI_XFORM_RESOLUTION: (32, 32)
31 | ROI_XFORM_SAMPLING_RATIO: 2
32 | RESOLUTION: (128, 128)
33 | SEMSEG_SCORE_THRESH: 0.05
34 | LOSS_WEIGHT: 2.0 # double loss weight
35 | GCE_HEAD:
36 | NUM_CONVS_AFTER_ASPPV3: 4
37 | USE_NL: True
38 | SOLVER:
39 | WEIGHT_DECAY: 0.0001
40 | BASE_LR: 0.02
41 | GAMMA: 0.1
42 | WARM_UP_ITERS: 500
43 | WARM_UP_FACTOR: 0.01
44 | MAX_ITER: 24000
45 | STEPS: [15000, 20000]
46 | SNAPSHOT_ITERS: 5000
47 | TRAIN:
48 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
49 | DATASETS: ('MHP-v2_train', )
50 | SCALES: (512, 640, 704, 768, 800, 864)
51 | MAX_SIZE: 1400
52 | TEST:
53 | DATASETS: ('MHP-v2_val',)
54 | SCALE: 800
55 | MAX_SIZE: 1333
56 |
--------------------------------------------------------------------------------
/cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | # bbox_AP: 69.0/94.1/78.8/-1.0/56.7/69.1;
2 | # parsing: (mIoU:36.1/AP50:27.4/APvol:40.5/PCP50:38.3)
3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717]
4 | PIXEL_STDS: [1.0, 1.0, 1.0]
5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms'
6 | MODEL:
7 | FPN_ON: True
8 | FASTER_ON: True
9 | PARSING_ON: True
10 | NUM_CLASSES: 2
11 | CONV1_RGB2BGR: False # caffe style
12 | BACKBONE:
13 | CONV_BODY: "resnet"
14 | RESNET: # caffe style
15 | LAYERS: (3, 4, 6, 3)
16 | RPN:
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | FAST_RCNN:
23 | ROI_XFORM_RESOLUTION: (7, 7)
24 | ROI_XFORM_SAMPLING_RATIO: 2
25 | PRCNN:
26 | ROI_PARSING_HEAD: "roi_gce_head"
27 | NUM_PARSING: 59
28 | ROI_STRIDES: [4]
29 | ROI_SIZE_PER_IMG: 16
30 | ROI_XFORM_RESOLUTION: (32, 32)
31 | ROI_XFORM_SAMPLING_RATIO: 2
32 | RESOLUTION: (128, 128)
33 | SEMSEG_SCORE_THRESH: 0.05
34 | LOSS_WEIGHT: 2.0 # double loss weight
35 | GCE_HEAD:
36 | NUM_CONVS_AFTER_ASPPV3: 4
37 | USE_NL: True
38 | SOLVER:
39 | WEIGHT_DECAY: 0.0001
40 | BASE_LR: 0.02
41 | GAMMA: 0.1
42 | WARM_UP_ITERS: 500
43 | WARM_UP_FACTOR: 0.01
44 | MAX_ITER: 72000
45 | STEPS: [54000, 64000]
46 | TRAIN:
47 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth
48 | DATASETS: ('MHP-v2_train', )
49 | SCALES: (512, 640, 704, 768, 800, 864)
50 | MAX_SIZE: 1400
51 | TEST:
52 | DATASETS: ('MHP-v2_val',)
53 | SCALE: 800
54 | MAX_SIZE: 1333
55 |
--------------------------------------------------------------------------------
/ckpts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/ckpts/README.md
--------------------------------------------------------------------------------
/data/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/output.png
--------------------------------------------------------------------------------
/data/parsing_rcnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/parsing_rcnn.png
--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # export CXXFLAGS="-std=c++11"
4 | # export CFLAGS="-std=c99"
5 |
6 | PYTHON=${PYTHON:-"python"}
7 | cd models/ops
8 |
9 | echo "Building bbox op..."
10 | python setup_ssd.py build_ext --inplace
11 | rm -rf build
12 |
13 | echo "Building rcnn op..."
14 | if [ -d "build" ]; then
15 | rm -r build
16 | fi
17 | $PYTHON setup_rcnn.py build_ext --inplace
18 | rm -r build
19 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/models/imagenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import *
2 | from .mobilenet_v1 import *
3 | from .mobilenet_v2 import *
4 | from .mobilenet_v3 import *
5 | from .resnet import *
6 | from .resnext import *
7 | from .vovnet import *
8 |
--------------------------------------------------------------------------------
/models/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm
2 | from .misc import Conv2d, ConvTranspose2d, BatchNorm2d, interpolate
3 | from .nms import nms, ml_nms
4 | from .l2_loss import l2_loss
5 | from .iou_loss import IOULoss
6 | from .scale import Scale
7 | from .smooth_l1_loss import smooth_l1_loss, smooth_l1_loss_LW
8 | from .adjust_smooth_l1_loss import AdjustSmoothL1Loss
9 | from .sigmoid_focal_loss import SigmoidFocalLoss
10 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
11 | from .dcn.deform_conv_module import DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack
12 | from .dcn.deform_pool_func import deform_roi_pooling
13 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
14 | from .affine import AffineChannel2d
15 | from .bilinear_interpolation2d import BilinearInterpolation2d
16 | from .conv2d_samepadding import Conv2dSamePadding
17 | from .conv2d_ws import Conv2dWS
18 | from .dropblock import DropBlock2D
19 | from .l2norm import L2Norm
20 | from .label_smoothing import LabelSmoothing
21 | from .nonlocal2d import NonLocal2d, MS_NonLocal2d
22 | from .squeeze_excitation import SeConv2d, GDWSe2d
23 | from .pool_points_interp import PoolPointsInterp
24 | from .context_block import GlobalContextBlock
25 | from .mixture_batchnorm import MixtureBatchNorm2d, MixtureGroupNorm
26 | from .lovasz_hinge_loss import LovaszHinge
27 |
--------------------------------------------------------------------------------
/models/ops/adjust_smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class AdjustSmoothL1Loss(nn.Module):
6 |
7 | def __init__(self, num_features, momentum=0.1, beta=1. /9):
8 | super(AdjustSmoothL1Loss, self).__init__()
9 | self.num_features = num_features
10 | self.momentum = momentum
11 | self.beta = beta
12 | self.register_buffer(
13 | 'running_mean', torch.empty(num_features).fill_(beta)
14 | )
15 | self.register_buffer('running_var', torch.zeros(num_features))
16 |
17 | def forward(self, inputs, target, size_average=True):
18 |
19 | n = torch.abs(inputs -target)
20 | with torch.no_grad():
21 | if torch.isnan(n.var(dim=0)).sum().item() == 0:
22 | self.running_mean = self.running_mean.to(n.device)
23 | self.running_mean *= (1 - self.momentum)
24 | self.running_mean += (self.momentum * n.mean(dim=0))
25 | self.running_var = self.running_var.to(n.device)
26 | self.running_var *= (1 - self.momentum)
27 | self.running_var += (self.momentum * n.var(dim=0))
28 |
29 |
30 | beta = (self.running_mean - self.running_var)
31 | beta = beta.clamp(max=self.beta, min=1e-3)
32 |
33 | beta = beta.view(-1, self.num_features).to(n.device)
34 | cond = n < beta.expand_as(n)
35 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
36 | if size_average:
37 | return loss.mean()
38 | return loss.sum()
39 |
40 |
--------------------------------------------------------------------------------
/models/ops/affine.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class AffineChannel2d(nn.Module):
6 | """ A simple channel-wise affine transformation operation """
7 | def __init__(self, num_features):
8 | super().__init__()
9 | self.num_features = num_features
10 | self.weight = nn.Parameter(torch.Tensor(num_features))
11 | self.bias = nn.Parameter(torch.Tensor(num_features))
12 | self.weight.data.uniform_()
13 | self.bias.data.zero_()
14 |
15 | def forward(self, x):
16 | return x * self.weight.view(1, self.num_features, 1, 1) + \
17 | self.bias.view(1, self.num_features, 1, 1)
18 |
--------------------------------------------------------------------------------
/models/ops/batch_norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.distributed as dist
3 | from torch import nn
4 | from torch.autograd.function import Function
5 |
6 | from utils.misc import get_world_size
7 |
8 |
9 | class FrozenBatchNorm2d(nn.Module):
10 | """
11 | BatchNorm2d where the batch statistics and the affine parameters
12 | are fixed
13 | """
14 |
15 | def __init__(self, n):
16 | super(FrozenBatchNorm2d, self).__init__()
17 | self.register_buffer("weight", torch.ones(n))
18 | self.register_buffer("bias", torch.zeros(n))
19 | self.register_buffer("running_mean", torch.zeros(n))
20 | self.register_buffer("running_var", torch.ones(n))
21 |
22 | def forward(self, x):
23 | # Cast all fixed parameters to half() if necessary
24 | if x.dtype == torch.float16:
25 | self.weight = self.weight.half()
26 | self.bias = self.bias.half()
27 | self.running_mean = self.running_mean.half()
28 | self.running_var = self.running_var.half()
29 |
30 | scale = self.weight * self.running_var.rsqrt()
31 | bias = self.bias - self.running_mean * scale
32 | scale = scale.reshape(1, -1, 1, 1)
33 | bias = bias.reshape(1, -1, 1, 1)
34 | return x * scale + bias
35 |
36 |
37 | class AllReduce(Function):
38 | @staticmethod
39 | def forward(ctx, input):
40 | input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())]
41 | # Use allgather instead of allreduce since I don't trust in-place operations ..
42 | dist.all_gather(input_list, input, async_op=False)
43 | inputs = torch.stack(input_list, dim=0)
44 | return torch.sum(inputs, dim=0)
45 |
46 | @staticmethod
47 | def backward(ctx, grad_output):
48 | dist.all_reduce(grad_output, async_op=False)
49 | return grad_output
50 |
51 |
52 | class NaiveSyncBatchNorm(nn.BatchNorm2d):
53 | """
54 | This function is taken from the detectron2 repo.
55 | It can be seen here:
56 | https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/batch_norm.py
57 |
58 | `torch.nn.SyncBatchNorm` has known unknown bugs.
59 | It produces significantly worse AP (and sometimes goes NaN)
60 | when the batch size on each worker is quite different
61 | (e.g., when scale augmentation is used, or when it is applied to mask head).
62 | Use this implementation before `nn.SyncBatchNorm` is fixed.
63 | It is slower than `nn.SyncBatchNorm`.
64 | """
65 |
66 | def forward(self, input):
67 | if get_world_size() == 1 or not self.training:
68 | return super().forward(input)
69 |
70 | assert input.shape[0] > 0, "SyncBatchNorm does not support empty inputs"
71 | C = input.shape[1]
72 | mean = torch.mean(input, dim=[0, 2, 3])
73 | meansqr = torch.mean(input * input, dim=[0, 2, 3])
74 |
75 | vec = torch.cat([mean, meansqr], dim=0)
76 | vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())
77 |
78 | mean, meansqr = torch.split(vec, C)
79 | var = meansqr - mean * mean
80 | self.running_mean += self.momentum * (mean.detach() - self.running_mean)
81 | self.running_var += self.momentum * (var.detach() - self.running_var)
82 |
83 | invstd = torch.rsqrt(var + self.eps)
84 | scale = self.weight * invstd
85 | bias = self.bias - mean * scale
86 | scale = scale.reshape(1, -1, 1, 1)
87 | bias = bias.reshape(1, -1, 1, 1)
88 | return input * scale + bias
89 |
--------------------------------------------------------------------------------
/models/ops/bilinear_interpolation2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class BilinearInterpolation2d(nn.Module):
9 | """Bilinear interpolation in space of scale.
10 |
11 | Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
12 |
13 | Adapted from the CVPR'15 FCN code.
14 | See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
15 | """
16 | def __init__(self, in_channels, out_channels, up_scale):
17 | super().__init__()
18 | assert in_channels == out_channels
19 | assert up_scale % 2 == 0, 'Scale should be even'
20 | self.in_channes = in_channels
21 | self.out_channels = out_channels
22 | self.up_scale = int(up_scale)
23 | self.padding = up_scale // 2
24 |
25 | def upsample_filt(size):
26 | factor = (size + 1) // 2
27 | if size % 2 == 1:
28 | center = factor - 1
29 | else:
30 | center = factor - 0.5
31 | og = np.ogrid[:size, :size]
32 | return ((1 - abs(og[0] - center) / factor) *
33 | (1 - abs(og[1] - center) / factor))
34 |
35 | kernel_size = up_scale * 2
36 | bil_filt = upsample_filt(kernel_size)
37 |
38 | kernel = np.zeros(
39 | (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32
40 | )
41 | kernel[range(in_channels), range(out_channels), :, :] = bil_filt
42 |
43 | self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
44 | stride=self.up_scale, padding=self.padding)
45 |
46 | self.upconv.weight.data.copy_(torch.from_numpy(kernel))
47 | self.upconv.bias.data.fill_(0)
48 | self.upconv.weight.requires_grad = False
49 | self.upconv.bias.requires_grad = False
50 |
51 | def forward(self, x):
52 | return self.upconv(x)
53 |
--------------------------------------------------------------------------------
/models/ops/context_block.py:
--------------------------------------------------------------------------------
1 | """
2 | Creates a GCB Model as defined in:
3 | Yue Cao, Jiarui Xu, Stephen Lin, Fangyun Wei, Han Hu. (2019 Arxiv).
4 | GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond.
5 | Copyright (c) Yang Lu, 2019
6 | """
7 | import torch
8 | from torch import nn
9 |
10 |
11 | def constant_init(module, val, bias=0):
12 | nn.init.constant_(module.weight, val)
13 | if hasattr(module, 'bias') and module.bias is not None:
14 | nn.init.constant_(module.bias, bias)
15 |
16 |
17 | def last_zero_init(m):
18 | if isinstance(m, nn.Sequential):
19 | constant_init(m[-1], val=0)
20 | else:
21 | constant_init(m, val=0)
22 |
23 |
24 | class GlobalContextBlock(nn.Module):
25 | def __init__(self, inplanes, innerplanse, pooling_type='att', fusion_types=('channel_add', )):
26 | super(GlobalContextBlock, self).__init__()
27 | assert pooling_type in ['avg', 'att']
28 | assert isinstance(fusion_types, (list, tuple))
29 | valid_fusion_types = ['channel_add', 'channel_mul']
30 | assert all([f in valid_fusion_types for f in fusion_types])
31 | assert len(fusion_types) > 0, 'at least one fusion should be used'
32 | self.inplanes = inplanes
33 | self.innerplanse = innerplanse
34 | self.pooling_type = pooling_type
35 | self.fusion_types = fusion_types
36 | if pooling_type == 'att':
37 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
38 | self.softmax = nn.Softmax(dim=2)
39 | else:
40 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
41 | if 'channel_add' in fusion_types:
42 | self.channel_add_conv = nn.Sequential(
43 | nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1),
44 | nn.LayerNorm([self.innerplanse, 1, 1]),
45 | nn.ReLU(inplace=True),
46 | nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1)
47 | )
48 | else:
49 | self.channel_add_conv = None
50 | if 'channel_mul' in fusion_types:
51 | self.channel_mul_conv = nn.Sequential(
52 | nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1),
53 | nn.LayerNorm([self.innerplanse, 1, 1]),
54 | nn.ReLU(inplace=True),
55 | nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1)
56 | )
57 | else:
58 | self.channel_mul_conv = None
59 | self.reset_parameters()
60 |
61 | def reset_parameters(self):
62 | if self.pooling_type == 'att':
63 | nn.init.kaiming_normal_(self.conv_mask.weight, mode='fan_in', nonlinearity='relu')
64 | self.conv_mask.inited = True
65 |
66 | if self.channel_add_conv is not None:
67 | last_zero_init(self.channel_add_conv)
68 | if self.channel_mul_conv is not None:
69 | last_zero_init(self.channel_mul_conv)
70 |
71 | def spatial_pool(self, x):
72 | batch, channel, height, width = x.size()
73 | if self.pooling_type == 'att':
74 | input_x = x
75 | # [N, C, H * W]
76 | input_x = input_x.view(batch, channel, height * width)
77 | # [N, 1, C, H * W]
78 | input_x = input_x.unsqueeze(1)
79 | # [N, 1, H, W]
80 | context_mask = self.conv_mask(x)
81 | # [N, 1, H * W]
82 | context_mask = context_mask.view(batch, 1, height * width)
83 | # [N, 1, H * W]
84 | context_mask = self.softmax(context_mask)
85 | # [N, 1, H * W, 1]
86 | context_mask = context_mask.unsqueeze(-1)
87 | # [N, 1, C, 1]
88 | context = torch.matmul(input_x, context_mask)
89 | # [N, C, 1, 1]
90 | context = context.view(batch, channel, 1, 1)
91 | else:
92 | # [N, C, 1, 1]
93 | context = self.avg_pool(x)
94 |
95 | return context
96 |
97 | def forward(self, x):
98 | # [N, C, 1, 1]
99 | context = self.spatial_pool(x)
100 |
101 | out = x
102 | if self.channel_mul_conv is not None:
103 | # [N, C, 1, 1]
104 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
105 | out = out * channel_mul_term
106 | if self.channel_add_conv is not None:
107 | # [N, C, 1, 1]
108 | channel_add_term = self.channel_add_conv(context)
109 | out = out + channel_add_term
110 |
111 | return out
112 |
--------------------------------------------------------------------------------
/models/ops/conv2d_samepadding.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | from torch import nn
4 | from torch.nn import functional as F
5 |
6 |
7 | class Conv2dSamePadding(nn.Conv2d):
8 | """ 2D Convolutions like TensorFlow """
9 |
10 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
11 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
12 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
13 |
14 | def forward(self, x):
15 | ih, iw = x.size()[-2:]
16 | kh, kw = self.weight.size()[-2:]
17 | sh, sw = self.stride
18 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
19 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
20 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
21 | if pad_h > 0 or pad_w > 0:
22 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
23 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
24 |
--------------------------------------------------------------------------------
/models/ops/conv2d_ws.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 |
5 | class Conv2dWS(nn.Conv2d):
6 | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
7 | padding=0, dilation=1, groups=1, bias=True):
8 | super(Conv2dWS, self).__init__(in_channels, out_channels, kernel_size, stride,
9 | padding, dilation, groups, bias)
10 |
11 | def forward(self, x):
12 | # return super(Conv2d, self).forward(x)
13 | weight = self.weight
14 | weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
15 | weight = weight - weight_mean
16 | std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
17 | weight = weight / std.expand_as(weight)
18 | return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
19 |
--------------------------------------------------------------------------------
/models/ops/csrc/PoolPointsInterp.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifdef WITH_CUDA
4 | #include "cuda/vision.h"
5 | #endif
6 |
7 | // Interface for Python
8 | at::Tensor PoolPointsInterp_forward(const at::Tensor& input,
9 | const at::Tensor& rois,
10 | const float spatial_scale) {
11 | if (input.type().is_cuda()) {
12 | #ifdef WITH_CUDA
13 | return PoolPointsInterp_forward_cuda(input, rois, spatial_scale);
14 | #else
15 | AT_ERROR("Not compiled with GPU support");
16 | #endif
17 | }
18 | }
19 |
20 | at::Tensor PoolPointsInterp_backward(const at::Tensor& grad,
21 | const at::Tensor& rois,
22 | const float spatial_scale,
23 | const int batch_size,
24 | const int channels,
25 | const int height,
26 | const int width) {
27 | if (grad.type().is_cuda()) {
28 | #ifdef WITH_CUDA
29 | return PoolPointsInterp_backward_cuda(grad, rois, spatial_scale, batch_size, channels, height, width);
30 | #else
31 | AT_ERROR("Not compiled with GPU support");
32 | #endif
33 | }
34 | AT_ERROR("Not implemented on the CPU");
35 | }
36 |
37 |
--------------------------------------------------------------------------------
/models/ops/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 |
4 | #include "cpu/vision.h"
5 |
6 | #ifdef WITH_CUDA
7 | #include "cuda/vision.h"
8 | #endif
9 |
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 | const at::Tensor& rois,
13 | const float spatial_scale,
14 | const int pooled_height,
15 | const int pooled_width,
16 | const int sampling_ratio,
17 | bool aligned) {
18 | if (input.type().is_cuda()) {
19 | #ifdef WITH_CUDA
20 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned);
21 | #else
22 | AT_ERROR("Not compiled with GPU support");
23 | #endif
24 | }
25 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned);
26 | }
27 |
28 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
29 | const at::Tensor& rois,
30 | const float spatial_scale,
31 | const int pooled_height,
32 | const int pooled_width,
33 | const int batch_size,
34 | const int channels,
35 | const int height,
36 | const int width,
37 | const int sampling_ratio,
38 | bool aligned) {
39 | if (grad.type().is_cuda()) {
40 | #ifdef WITH_CUDA
41 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, aligned);
42 | #else
43 | AT_ERROR("Not compiled with GPU support");
44 | #endif
45 | }
46 | AT_ERROR("Not implemented on the CPU");
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/models/ops/csrc/ROIPool.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 |
4 | #include "cpu/vision.h"
5 |
6 | #ifdef WITH_CUDA
7 | #include "cuda/vision.h"
8 | #endif
9 |
10 |
11 | std::tuple ROIPool_forward(const at::Tensor& input,
12 | const at::Tensor& rois,
13 | const float spatial_scale,
14 | const int pooled_height,
15 | const int pooled_width) {
16 | if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 | AT_ERROR("Not compiled with GPU support");
21 | #endif
22 | }
23 | AT_ERROR("Not implemented on the CPU");
24 | }
25 |
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 | const at::Tensor& input,
28 | const at::Tensor& rois,
29 | const at::Tensor& argmax,
30 | const float spatial_scale,
31 | const int pooled_height,
32 | const int pooled_width,
33 | const int batch_size,
34 | const int channels,
35 | const int height,
36 | const int width) {
37 | if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 | AT_ERROR("Not compiled with GPU support");
42 | #endif
43 | }
44 | AT_ERROR("Not implemented on the CPU");
45 | }
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/models/ops/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | const at::Tensor& logits,
12 | const at::Tensor& targets,
13 | const int num_classes,
14 | const float gamma,
15 | const float alpha) {
16 | if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 | AT_ERROR("Not compiled with GPU support");
21 | #endif
22 | }
23 | AT_ERROR("Not implemented on the CPU");
24 | }
25 |
26 | at::Tensor SigmoidFocalLoss_backward(
27 | const at::Tensor& logits,
28 | const at::Tensor& targets,
29 | const at::Tensor& d_losses,
30 | const int num_classes,
31 | const float gamma,
32 | const float alpha) {
33 | if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 | AT_ERROR("Not compiled with GPU support");
38 | #endif
39 | }
40 | AT_ERROR("Not implemented on the CPU");
41 | }
42 |
--------------------------------------------------------------------------------
/models/ops/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include "cpu/vision.h"
3 |
4 |
5 | template
6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
7 | const at::Tensor& scores,
8 | const float threshold) {
9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 |
13 | if (dets.numel() == 0) {
14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 | }
16 |
17 | auto x1_t = dets.select(1, 0).contiguous();
18 | auto y1_t = dets.select(1, 1).contiguous();
19 | auto x2_t = dets.select(1, 2).contiguous();
20 | auto y2_t = dets.select(1, 3).contiguous();
21 |
22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 |
24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 |
26 | auto ndets = dets.size(0);
27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 |
29 | auto suppressed = suppressed_t.data();
30 | auto order = order_t.data();
31 | auto x1 = x1_t.data();
32 | auto y1 = y1_t.data();
33 | auto x2 = x2_t.data();
34 | auto y2 = y2_t.data();
35 | auto areas = areas_t.data();
36 |
37 | for (int64_t _i = 0; _i < ndets; _i++) {
38 | auto i = order[_i];
39 | if (suppressed[i] == 1)
40 | continue;
41 | auto ix1 = x1[i];
42 | auto iy1 = y1[i];
43 | auto ix2 = x2[i];
44 | auto iy2 = y2[i];
45 | auto iarea = areas[i];
46 |
47 | for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 | auto j = order[_j];
49 | if (suppressed[j] == 1)
50 | continue;
51 | auto xx1 = std::max(ix1, x1[j]);
52 | auto yy1 = std::max(iy1, y1[j]);
53 | auto xx2 = std::min(ix2, x2[j]);
54 | auto yy2 = std::min(iy2, y2[j]);
55 |
56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1);
57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1);
58 | auto inter = w * h;
59 | auto ovr = inter / (iarea + areas[j] - inter);
60 | if (ovr >= threshold)
61 | suppressed[j] = 1;
62 | }
63 | }
64 | return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 |
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 | const at::Tensor& scores,
69 | const float threshold) {
70 | at::Tensor result;
71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 | result = nms_cpu_kernel(dets, scores, threshold);
73 | });
74 | return result;
75 | }
76 |
--------------------------------------------------------------------------------
/models/ops/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include
4 |
5 |
6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
7 | const at::Tensor& rois,
8 | const float spatial_scale,
9 | const int pooled_height,
10 | const int pooled_width,
11 | const int sampling_ratio,
12 | bool aligned);
13 |
14 |
15 | at::Tensor nms_cpu(const at::Tensor& dets,
16 | const at::Tensor& scores,
17 | const float threshold);
18 |
--------------------------------------------------------------------------------
/models/ops/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
1 | // modify from
2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
3 |
4 | // based on
5 | // author: Charles Shang
6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
7 |
8 | #include
9 | #include
10 |
11 | #include
12 | #include
13 |
14 | #include
15 | #include
16 | #include
17 |
18 |
19 | void DeformablePSROIPoolForward(
20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 | const int height, const int width, const int num_bbox,
23 | const int channels_trans, const int no_trans, const float spatial_scale,
24 | const int output_dim, const int group_size, const int pooled_size,
25 | const int part_size, const int sample_per_part, const float trans_std);
26 |
27 | void DeformablePSROIPoolBackwardAcc(
28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 | at::Tensor trans_grad, const int batch, const int channels,
31 | const int height, const int width, const int num_bbox,
32 | const int channels_trans, const int no_trans, const float spatial_scale,
33 | const int output_dim, const int group_size, const int pooled_size,
34 | const int part_size, const int sample_per_part, const float trans_std);
35 |
36 | void deform_psroi_pooling_cuda_forward(
37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 | at::Tensor top_count, const int no_trans, const float spatial_scale,
39 | const int output_dim, const int group_size, const int pooled_size,
40 | const int part_size, const int sample_per_part, const float trans_std)
41 | {
42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 |
44 | const int batch = input.size(0);
45 | const int channels = input.size(1);
46 | const int height = input.size(2);
47 | const int width = input.size(3);
48 | const int channels_trans = no_trans ? 2 : trans.size(1);
49 |
50 | const int num_bbox = bbox.size(0);
51 | if (num_bbox != out.size(0))
52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 | out.size(0), num_bbox);
54 |
55 | DeformablePSROIPoolForward(
56 | input, bbox, trans, out, top_count, batch, channels, height, width,
57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 | pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 |
61 | void deform_psroi_pooling_cuda_backward(
62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 | const int no_trans, const float spatial_scale, const int output_dim,
65 | const int group_size, const int pooled_size, const int part_size,
66 | const int sample_per_part, const float trans_std)
67 | {
68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 |
71 | const int batch = input.size(0);
72 | const int channels = input.size(1);
73 | const int height = input.size(2);
74 | const int width = input.size(3);
75 | const int channels_trans = no_trans ? 2 : trans.size(1);
76 |
77 | const int num_bbox = bbox.size(0);
78 | if (num_bbox != out_grad.size(0))
79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 | out_grad.size(0), num_bbox);
81 |
82 | DeformablePSROIPoolBackwardAcc(
83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 | channels, height, width, num_bbox, channels_trans, no_trans,
85 | spatial_scale, output_dim, group_size, pooled_size, part_size,
86 | sample_per_part, trans_std);
87 | }
88 |
--------------------------------------------------------------------------------
/models/ops/csrc/deform_pool.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 |
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 | at::Tensor input,
13 | at::Tensor bbox,
14 | at::Tensor trans,
15 | at::Tensor out,
16 | at::Tensor top_count,
17 | const int no_trans,
18 | const float spatial_scale,
19 | const int output_dim,
20 | const int group_size,
21 | const int pooled_size,
22 | const int part_size,
23 | const int sample_per_part,
24 | const float trans_std)
25 | {
26 | if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 | return deform_psroi_pooling_cuda_forward(
29 | input, bbox, trans, out, top_count,
30 | no_trans, spatial_scale, output_dim, group_size,
31 | pooled_size, part_size, sample_per_part, trans_std
32 | );
33 | #else
34 | AT_ERROR("Not compiled with GPU support");
35 | #endif
36 | }
37 | AT_ERROR("Not implemented on the CPU");
38 | }
39 |
40 |
41 | void deform_psroi_pooling_backward(
42 | at::Tensor out_grad,
43 | at::Tensor input,
44 | at::Tensor bbox,
45 | at::Tensor trans,
46 | at::Tensor top_count,
47 | at::Tensor input_grad,
48 | at::Tensor trans_grad,
49 | const int no_trans,
50 | const float spatial_scale,
51 | const int output_dim,
52 | const int group_size,
53 | const int pooled_size,
54 | const int part_size,
55 | const int sample_per_part,
56 | const float trans_std)
57 | {
58 | if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 | return deform_psroi_pooling_cuda_backward(
61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 | no_trans, spatial_scale, output_dim, group_size, pooled_size,
63 | part_size, sample_per_part, trans_std
64 | );
65 | #else
66 | AT_ERROR("Not compiled with GPU support");
67 | #endif
68 | }
69 | AT_ERROR("Not implemented on the CPU");
70 | }
71 |
--------------------------------------------------------------------------------
/models/ops/csrc/ml_nms.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 |
10 | at::Tensor ml_nms(const at::Tensor& dets,
11 | const at::Tensor& scores,
12 | const at::Tensor& labels,
13 | const float threshold) {
14 |
15 | if (dets.type().is_cuda()) {
16 | #ifdef WITH_CUDA
17 | // TODO raise error if not compiled with CUDA
18 | if (dets.numel() == 0)
19 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
20 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
21 | return ml_nms_cuda(b, threshold);
22 | #else
23 | AT_ERROR("Not compiled with GPU support");
24 | #endif
25 | }
26 | AT_ERROR("CPU version not implemented");
27 | }
28 |
--------------------------------------------------------------------------------
/models/ops/csrc/nms.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 |
10 | at::Tensor nms(const at::Tensor& dets,
11 | const at::Tensor& scores,
12 | const float threshold) {
13 |
14 | if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 | // TODO raise error if not compiled with CUDA
17 | if (dets.numel() == 0)
18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 | return nms_cuda(b, threshold);
21 | #else
22 | AT_ERROR("Not compiled with GPU support");
23 | #endif
24 | }
25 |
26 | at::Tensor result = nms_cpu(dets, scores, threshold);
27 | return result;
28 | }
29 |
--------------------------------------------------------------------------------
/models/ops/csrc/vision.cpp:
--------------------------------------------------------------------------------
1 | #include "nms.h"
2 | #include "ml_nms.h"
3 | #include "ROIAlign.h"
4 | #include "ROIPool.h"
5 | #include "SigmoidFocalLoss.h"
6 | #include "deform_conv.h"
7 | #include "deform_pool.h"
8 | #include "PoolPointsInterp.h"
9 |
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 | m.def("nms", &nms, "non-maximum suppression");
12 | m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression");
13 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
14 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
15 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
16 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
17 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
18 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
19 | m.def("pool_points_interp_forward", &PoolPointsInterp_forward, "PoolPointsInterp_forward");
20 | m.def("pool_points_interp_backward", &PoolPointsInterp_backward, "PoolPointsInterp_backward");
21 | // dcn-v2
22 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
23 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
24 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
25 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
26 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
27 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
28 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
29 | }
30 |
--------------------------------------------------------------------------------
/models/ops/cython_bbox.pyx:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 |
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 |
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 |
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 | np.ndarray[DTYPE_t, ndim=2] boxes,
34 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 | """
36 | Parameters
37 | ----------
38 | boxes: (N, 4) ndarray of float
39 | query_boxes: (K, 4) ndarray of float
40 | Returns
41 | -------
42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 | """
44 | cdef unsigned int N = boxes.shape[0]
45 | cdef unsigned int K = query_boxes.shape[0]
46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 | cdef DTYPE_t iw, ih, box_area
48 | cdef DTYPE_t ua
49 | cdef unsigned int k, n
50 | with nogil:
51 | for k in range(K):
52 | box_area = (
53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 | )
56 | for n in range(N):
57 | iw = (
58 | min(boxes[n, 2], query_boxes[k, 2]) -
59 | max(boxes[n, 0], query_boxes[k, 0]) + 1
60 | )
61 | if iw > 0:
62 | ih = (
63 | min(boxes[n, 3], query_boxes[k, 3]) -
64 | max(boxes[n, 1], query_boxes[k, 1]) + 1
65 | )
66 | if ih > 0:
67 | ua = float(
68 | (boxes[n, 2] - boxes[n, 0] + 1) *
69 | (boxes[n, 3] - boxes[n, 1] + 1) +
70 | box_area - iw * ih
71 | )
72 | overlaps[n, k] = iw * ih / ua
73 | return overlaps
74 |
--------------------------------------------------------------------------------
/models/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | #
--------------------------------------------------------------------------------
/models/ops/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from torch.autograd.function import once_differentiable
4 |
5 | from models.ops import _C
6 |
7 |
8 | class DeformRoIPoolingFunction(Function):
9 |
10 | @staticmethod
11 | def forward(
12 | ctx,
13 | data,
14 | rois,
15 | offset,
16 | spatial_scale,
17 | out_size,
18 | out_channels,
19 | no_trans,
20 | group_size=1,
21 | part_size=None,
22 | sample_per_part=4,
23 | trans_std=.0
24 | ):
25 | ctx.spatial_scale = spatial_scale
26 | ctx.out_size = out_size
27 | ctx.out_channels = out_channels
28 | ctx.no_trans = no_trans
29 | ctx.group_size = group_size
30 | ctx.part_size = out_size if part_size is None else part_size
31 | ctx.sample_per_part = sample_per_part
32 | ctx.trans_std = trans_std
33 |
34 | assert 0.0 <= ctx.trans_std <= 1.0
35 | if not data.is_cuda:
36 | raise NotImplementedError
37 |
38 | n = rois.shape[0]
39 | output = data.new_empty(n, out_channels, out_size, out_size)
40 | output_count = data.new_empty(n, out_channels, out_size, out_size)
41 | _C.deform_psroi_pooling_forward(
42 | data,
43 | rois,
44 | offset,
45 | output,
46 | output_count,
47 | ctx.no_trans,
48 | ctx.spatial_scale,
49 | ctx.out_channels,
50 | ctx.group_size,
51 | ctx.out_size,
52 | ctx.part_size,
53 | ctx.sample_per_part,
54 | ctx.trans_std
55 | )
56 |
57 | if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 | ctx.save_for_backward(data, rois, offset)
59 | ctx.output_count = output_count
60 |
61 | return output
62 |
63 | @staticmethod
64 | @once_differentiable
65 | def backward(ctx, grad_output):
66 | if not grad_output.is_cuda:
67 | raise NotImplementedError
68 |
69 | data, rois, offset = ctx.saved_tensors
70 | output_count = ctx.output_count
71 | grad_input = torch.zeros_like(data)
72 | grad_rois = None
73 | grad_offset = torch.zeros_like(offset)
74 |
75 | _C.deform_psroi_pooling_backward(
76 | grad_output,
77 | data,
78 | rois,
79 | offset,
80 | output_count,
81 | grad_input,
82 | grad_offset,
83 | ctx.no_trans,
84 | ctx.spatial_scale,
85 | ctx.out_channels,
86 | ctx.group_size,
87 | ctx.out_size,
88 | ctx.part_size,
89 | ctx.sample_per_part,
90 | ctx.trans_std
91 | )
92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 |
94 |
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 |
--------------------------------------------------------------------------------
/models/ops/dropblock.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from torch import nn
4 |
5 |
6 | class DropBlock2D(nn.Module):
7 | r"""Randomly zeroes spatial blocks of the input tensor.
8 | As described in the paper
9 | `DropBlock: A regularization method for convolutional networks`_ ,
10 | dropping whole blocks of feature map allows to remove semantic
11 | information as compared to regular dropout.
12 | Args:
13 | keep_prob (float, optional): probability of an element to be kept.
14 | Authors recommend to linearly decrease this value from 1 to desired
15 | value.
16 | block_size (int, optional): size of the block. Block size in paper
17 | usually equals last feature map dimensions.
18 | Shape:
19 | - Input: :math:`(N, C, H, W)`
20 | - Output: :math:`(N, C, H, W)` (same shape as input)
21 | .. _DropBlock: A regularization method for convolutional networks:
22 | https://arxiv.org/abs/1810.12890
23 | """
24 |
25 | def __init__(self, keep_prob=0.9, block_size=7):
26 | super(DropBlock2D, self).__init__()
27 | self.keep_prob = keep_prob
28 | self.block_size = block_size
29 |
30 | def forward(self, input):
31 | if not self.training or self.keep_prob == 1:
32 | return input
33 | gamma = (1. - self.keep_prob) / self.block_size ** 2
34 | for sh in input.shape[2:]:
35 | gamma *= sh / (sh - self.block_size + 1)
36 | M = torch.bernoulli(torch.ones_like(input) * gamma)
37 | Msum = F.conv2d(M,
38 | torch.ones((input.shape[1], 1, self.block_size, self.block_size)).to(device=input.device,
39 | dtype=input.dtype),
40 | padding=self.block_size // 2,
41 | groups=input.shape[1])
42 | torch.set_printoptions(threshold=5000)
43 | mask = (Msum < 1).to(device=input.device, dtype=input.dtype)
44 | return input * mask * mask.numel() /mask.sum() #TODO input * mask * self.keep_prob ?
45 |
46 |
--------------------------------------------------------------------------------
/models/ops/iou_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class IOULoss(nn.Module):
6 | def __init__(self, loc_loss_type):
7 | super(IOULoss, self).__init__()
8 | self.loc_loss_type = loc_loss_type
9 |
10 | def forward(self, pred, target, weight=None):
11 | pred_left = pred[:, 0]
12 | pred_top = pred[:, 1]
13 | pred_right = pred[:, 2]
14 | pred_bottom = pred[:, 3]
15 |
16 | target_left = target[:, 0]
17 | target_top = target[:, 1]
18 | target_right = target[:, 2]
19 | target_bottom = target[:, 3]
20 |
21 | target_area = (target_left + target_right) * (target_top + target_bottom)
22 | pred_area = (pred_left + pred_right) * (pred_top + pred_bottom)
23 |
24 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right)
25 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top)
26 | g_w_intersect = torch.max(pred_left, target_left) + torch.max(pred_right, target_right)
27 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top)
28 |
29 | area_intersect = w_intersect * h_intersect
30 | area_union = target_area + pred_area - area_intersect
31 | ac_uion = g_w_intersect * g_h_intersect + 1e-7
32 |
33 | ious = (area_intersect + 1.0) / (area_union + 1.0)
34 | gious = ious - (ac_uion - area_union) / ac_uion
35 |
36 | if self.loc_loss_type == 'iou':
37 | losses = -torch.log(ious)
38 | elif self.loc_loss_type == 'liou':
39 | losses = 1 - ious
40 | elif self.loc_loss_type == 'giou':
41 | losses = 1 - gious
42 | else:
43 | raise NotImplementedError
44 |
45 | if weight is not None and weight.sum() > 0:
46 | return (losses * weight).sum()
47 | else:
48 | assert losses.numel() != 0
49 | return losses.sum()
50 |
--------------------------------------------------------------------------------
/models/ops/l2_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def l2_loss(input, target):
5 | """
6 | very similar to the smooth_l1_loss from pytorch, but with
7 | the extra beta parameter
8 | """
9 | pos_inds = torch.nonzero(target > 0.0).squeeze(1)
10 | if pos_inds.shape[0] > 0:
11 | cond = torch.abs(input[pos_inds] - target[pos_inds])
12 | loss = 0.5 * cond ** 2 / pos_inds.shape[0]
13 | else:
14 | loss = input * 0.0
15 | return loss.sum()
16 |
--------------------------------------------------------------------------------
/models/ops/l2norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.init as init
4 |
5 |
6 | class L2Norm(nn.Module):
7 | def __init__(self, n_channels, scale):
8 | super(L2Norm, self).__init__()
9 | self.n_channels = n_channels
10 | self.gamma = scale or None
11 | self.eps = 1e-10
12 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
13 | self.reset_parameters()
14 |
15 | def reset_parameters(self):
16 | init.constant_(self.weight, self.gamma)
17 |
18 | def forward(self, x):
19 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
20 | x = x / norm
21 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
22 | return out
23 |
--------------------------------------------------------------------------------
/models/ops/label_smoothing.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class LabelSmoothing(nn.Module):
6 | """
7 | NLL loss with label smoothing.
8 | """
9 |
10 | def __init__(self, smoothing=0.0):
11 | """
12 | Constructor for the LabelSmoothing module.
13 | :param smoothing: label smoothing factor
14 | """
15 | super(LabelSmoothing, self).__init__()
16 | self.confidence = 1.0 - smoothing
17 | self.smoothing = smoothing
18 |
19 | def forward(self, x, target):
20 | logprobs = torch.nn.functional.log_softmax(x, dim=-1)
21 |
22 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
23 | nll_loss = nll_loss.squeeze(1)
24 | smooth_loss = -logprobs.mean(dim=-1)
25 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss
26 | return loss.mean()
27 |
--------------------------------------------------------------------------------
/models/ops/lovasz_hinge_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from torch.autograd import Variable
5 | import torch.nn.functional as F
6 |
7 |
8 | def lovasz_grad(gt_sorted):
9 | """
10 | Computes gradient of the Lovasz extension w.r.t sorted errors
11 | See Alg. 1 in paper
12 | """
13 | p = len(gt_sorted)
14 | gts = gt_sorted.sum()
15 | intersection = gts - gt_sorted.float().cumsum(0)
16 | union = gts + (1 - gt_sorted).float().cumsum(0)
17 | jaccard = 1. - intersection / union
18 | if p > 1: # cover 1-pixel case
19 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
20 | return jaccard
21 |
22 |
23 | class LovaszHinge(nn.Module):
24 | def __init__(self, reduction='mean'):
25 | super(LovaszHinge, self).__init__()
26 | self.reduction = reduction
27 |
28 | def flatten(self, input, target, mask=None):
29 | if mask is None:
30 | input_flatten = input.view(-1)
31 | target_flatten = target.view(-1)
32 | else:
33 | input_flatten = input[mask].view(-1)
34 | target_flatten = target[mask].view(-1)
35 | return input_flatten, target_flatten
36 |
37 | def lovasz_hinge_flat(self, logits, labels):
38 | """
39 | Binary Lovasz hinge loss
40 | logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
41 | labels: [P] Tensor, binary ground truth labels (0 or 1)
42 | ignore: label to ignore
43 | """
44 | if len(labels) == 0:
45 | # only void pixels, the gradients should be 0
46 | return logits.sum() * 0.
47 | signs = 2. * labels.float() - 1.
48 | errors = (1. - logits * Variable(signs))
49 | errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
50 | perm = perm.data
51 | gt_sorted = labels[perm]
52 | grad = lovasz_grad(gt_sorted)
53 | loss = torch.dot(F.relu(errors_sorted), Variable(grad))
54 | return loss
55 |
56 | def forward(self, inputs, targets, mask=None, act=False):
57 | losses = []
58 | for id in range(len(inputs)):
59 | if mask is not None:
60 | input_flatten, target_flatten = self.flatten(inputs[id], targets[id], mask[id])
61 | else:
62 | input_flatten, target_flatten = self.flatten(inputs[id], targets[id])
63 | if act:
64 | # map [0, 1] to [-inf, inf]
65 | input_flatten = torch.log(input_flatten) - torch.log(1 - input_flatten)
66 | losses.append(self.lovasz_hinge_flat(input_flatten, target_flatten))
67 | losses = torch.stack(losses).to(device=inputs.device)
68 | if self.reduction == "mean":
69 | losses = losses.mean()
70 | elif self.reduction == "sum":
71 | losses = losses.sum()
72 |
73 | return losses
74 |
--------------------------------------------------------------------------------
/models/ops/misc.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | from torch import nn
4 | from torch.nn.modules.utils import _ntuple
5 |
6 |
7 | class _NewEmptyTensorOp(torch.autograd.Function):
8 | @staticmethod
9 | def forward(ctx, x, new_shape):
10 | ctx.shape = x.shape
11 | return x.new_empty(new_shape)
12 |
13 | @staticmethod
14 | def backward(ctx, grad):
15 | shape = ctx.shape
16 | return _NewEmptyTensorOp.apply(grad, shape), None
17 |
18 |
19 | class Conv2d(torch.nn.Conv2d):
20 | def forward(self, x):
21 | if x.numel() > 0:
22 | return super(Conv2d, self).forward(x)
23 | # get output shape
24 |
25 | output_shape = [
26 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1
27 | for i, p, di, k, d in zip(
28 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
29 | )
30 | ]
31 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
32 | return _NewEmptyTensorOp.apply(x, output_shape)
33 |
34 |
35 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
36 | def forward(self, x):
37 | if x.numel() > 0:
38 | return super(ConvTranspose2d, self).forward(x)
39 | # get output shape
40 |
41 | output_shape = [
42 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
43 | for i, p, di, k, d, op in zip(
44 | x.shape[-2:],
45 | self.padding,
46 | self.dilation,
47 | self.kernel_size,
48 | self.stride,
49 | self.output_padding,
50 | )
51 | ]
52 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
53 | return _NewEmptyTensorOp.apply(x, output_shape)
54 |
55 |
56 | class BatchNorm2d(torch.nn.BatchNorm2d):
57 | def forward(self, x):
58 | if x.numel() > 0:
59 | return super(BatchNorm2d, self).forward(x)
60 | # get output shape
61 | output_shape = x.shape
62 | return _NewEmptyTensorOp.apply(x, output_shape)
63 |
64 |
65 | def interpolate(x, size=None, scale_factor=None, mode="nearest", align_corners=None):
66 | if x.numel() > 0:
67 | return torch.nn.functional.interpolate(
68 | x, size, scale_factor, mode, align_corners
69 | )
70 |
71 | def _check_size_scale_factor(dim):
72 | if size is None and scale_factor is None:
73 | raise ValueError("either size or scale_factor should be defined")
74 | if size is not None and scale_factor is not None:
75 | raise ValueError("only one of size or scale_factor should be defined")
76 | if (
77 | scale_factor is not None
78 | and isinstance(scale_factor, tuple)
79 | and len(scale_factor) != dim
80 | ):
81 | raise ValueError(
82 | "scale_factor shape must match input shape. "
83 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
84 | )
85 |
86 | def _output_size(dim):
87 | _check_size_scale_factor(dim)
88 | if size is not None:
89 | return size
90 | scale_factors = _ntuple(dim)(scale_factor)
91 | # math.floor might return float in py2.7
92 | return [
93 | int(math.floor(x.size(i + 2) * scale_factors[i])) for i in range(dim)
94 | ]
95 |
96 | output_shape = tuple(_output_size(2))
97 | output_shape = x.shape[:-2] + output_shape
98 | return _NewEmptyTensorOp.apply(x, output_shape)
99 |
--------------------------------------------------------------------------------
/models/ops/mixture_batchnorm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class H_Sigmoid(nn.Module):
7 | def forward(self, x):
8 | out = F.relu6(x + 3, inplace=True) / 6
9 | return out
10 |
11 |
12 | def make_norm(c, norm='bn', group=1, eps=1e-5):
13 | if norm == 'bn':
14 | return nn.BatchNorm2d(c, eps=eps)
15 | elif norm == 'gn':
16 | assert c % group == 0
17 | return nn.GroupNorm(group, c, eps=eps)
18 | elif norm == 'none':
19 | return None
20 | else:
21 | return nn.BatchNorm2d(c, eps=eps)
22 |
23 |
24 | class AttentionWeights(nn.Module):
25 | expansion = 2
26 |
27 | def __init__(self, num_channels, k, norm=None, groups=1, use_hsig=True):
28 | super(AttentionWeights, self).__init__()
29 | # num_channels *= 2
30 | self.k = k
31 | self.avgpool = nn.AdaptiveAvgPool2d(1)
32 | self.attention = nn.Sequential(
33 | nn.Conv2d(num_channels, k, 1, bias=False),
34 | make_norm(k, norm, groups),
35 | H_Sigmoid() if use_hsig else nn.Sigmoid()
36 | )
37 |
38 | def forward(self, x):
39 | b, c, _, _ = x.size()
40 | y = self.avgpool(x) # .view(b, c)
41 | var = torch.var(x, dim=(2, 3)).view(b, c, 1, 1)
42 | y *= (var + 1e-3).rsqrt()
43 | # y = torch.cat((y, var), dim=1)
44 | return self.attention(y).view(b, self.k)
45 |
46 |
47 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ?
48 | class MixtureBatchNorm2d(nn.BatchNorm2d):
49 | def __init__(self, num_channels, k, eps=1e-5, momentum=0.1, track_running_stats=True):
50 | super(MixtureBatchNorm2d, self).__init__(
51 | num_channels, eps=eps, momentum=momentum, affine=False, track_running_stats=track_running_stats)
52 | self.k = k
53 | self.weight_ = nn.Parameter(torch.Tensor(k, num_channels))
54 | self.bias_ = nn.Parameter(torch.Tensor(k, num_channels))
55 |
56 | self.attention_weights = AttentionWeights(num_channels, k, norm='bn')
57 |
58 | self._init_params()
59 |
60 | def _init_params(self):
61 | nn.init.normal_(self.weight_, 1, 0.1)
62 | nn.init.normal_(self.bias_, 0, 0.1)
63 |
64 | def forward(self, x):
65 | output = super(MixtureBatchNorm2d, self).forward(x)
66 | size = output.size()
67 | y = self.attention_weights(x) # bxk # or use output as attention input
68 |
69 | weight = y @ self.weight_ # bxc
70 | bias = y @ self.bias_ # bxc
71 | weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size)
72 | bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size)
73 |
74 | return weight * output + bias
75 |
76 |
77 | # Modified on top of nn.GroupNorm
78 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ?
79 | class MixtureGroupNorm(nn.Module):
80 | __constants__ = ['num_groups', 'num_channels', 'k', 'eps', 'weight', 'bias']
81 |
82 | def __init__(self, num_channels, num_groups, k, eps=1e-5):
83 | super(MixtureGroupNorm, self).__init__()
84 | self.num_groups = num_groups
85 | self.num_channels = num_channels
86 | self.k = k
87 | self.eps = eps
88 | self.affine = True
89 | self.weight_ = nn.Parameter(torch.Tensor(k, num_channels))
90 | self.bias_ = nn.Parameter(torch.Tensor(k, num_channels))
91 | self.register_parameter('weight', None)
92 | self.register_parameter('bias', None)
93 |
94 | self.attention_weights = AttentionWeights(num_channels, k, norm='gn', groups=1)
95 |
96 | self.reset_parameters()
97 |
98 | def reset_parameters(self):
99 | nn.init.normal_(self.weight_, 1, 0.1)
100 | nn.init.normal_(self.bias_, 0, 0.1)
101 |
102 | def forward(self, x):
103 | output = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
104 | size = output.size()
105 |
106 | y = self.attention_weights(x) # TODO: use output as attention input
107 |
108 | weight = y @ self.weight_
109 | bias = y @ self.bias_
110 |
111 | weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size)
112 | bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size)
113 |
114 | return weight * output + bias
115 |
116 | def extra_repr(self):
117 | return '{num_groups}, {num_channels}, eps={eps}, ' \
118 | 'affine={affine}'.format(**self.__dict__)
119 |
--------------------------------------------------------------------------------
/models/ops/nms.py:
--------------------------------------------------------------------------------
1 | from models.ops import _C
2 |
3 | from apex import amp
4 |
5 | # Only valid with fp32 inputs - give AMP the hint
6 | nms = amp.float_function(_C.nms)
7 | ml_nms = amp.float_function(_C.ml_nms)
8 |
9 | # nms.__doc__ = """
10 | # This function performs Non-maximum suppresion"""
11 |
--------------------------------------------------------------------------------
/models/ops/pool_points_interp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 |
6 | from models.ops import _C
7 |
8 | from apex import amp
9 |
10 |
11 | class _PoolPointsInterp(Function):
12 | @staticmethod
13 | def forward(ctx, input, roi, spatial_scale):
14 | ctx.save_for_backward(roi)
15 | ctx.spatial_scale = spatial_scale
16 | ctx.input_shape = input.size()
17 | output = _C.pool_points_interp_forward(
18 | input, roi, spatial_scale)
19 | return output
20 |
21 | @staticmethod
22 | @once_differentiable
23 | def backward(ctx, grad_output):
24 | rois, = ctx.saved_tensors
25 | spatial_scale = ctx.spatial_scale
26 | bs, ch, h, w = ctx.input_shape
27 | grad_input = _C.pool_points_interp_backward(
28 | grad_output,
29 | rois,
30 | spatial_scale,
31 | bs,
32 | ch,
33 | h,
34 | w,
35 | )
36 | return grad_input, None, None
37 |
38 |
39 | pool_points_interp = _PoolPointsInterp.apply
40 |
41 |
42 | class PoolPointsInterp(nn.Module):
43 | def __init__(self, spatial_scale=1.0):
44 | super(PoolPointsInterp, self).__init__()
45 | self.spatial_scale = spatial_scale
46 |
47 | @amp.float_function
48 | def forward(self, input, rois):
49 | return pool_points_interp(input, rois, self.spatial_scale)
50 |
51 | def __repr__(self):
52 | tmpstr = self.__class__.__name__ + "("
53 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
54 | tmpstr += ")"
55 | return tmpstr
56 |
--------------------------------------------------------------------------------
/models/ops/scale.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 |
5 | class Scale(nn.Module):
6 | def __init__(self, init_value=1.0):
7 | super(Scale, self).__init__()
8 | self.scale = nn.Parameter(torch.FloatTensor([init_value]))
9 |
10 | def forward(self, input):
11 | return input * self.scale
12 |
--------------------------------------------------------------------------------
/models/ops/setup_rcnn.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import glob
4 | import os
5 |
6 | import torch
7 | from setuptools import find_packages
8 | from setuptools import setup
9 | from torch.utils.cpp_extension import CUDA_HOME
10 | from torch.utils.cpp_extension import CppExtension
11 | from torch.utils.cpp_extension import CUDAExtension
12 |
13 | requirements = ["torch", "torchvision"]
14 |
15 |
16 | def get_extensions():
17 | this_dir = os.path.dirname(os.path.abspath(__file__))
18 | extensions_dir = os.path.join(this_dir, "csrc")
19 |
20 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
21 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
22 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
23 |
24 | sources = main_file + source_cpu
25 | extension = CppExtension
26 |
27 | extra_compile_args = {"cxx": []}
28 | define_macros = []
29 |
30 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
31 | extension = CUDAExtension
32 | sources += source_cuda
33 | define_macros += [("WITH_CUDA", None)]
34 | extra_compile_args["nvcc"] = [
35 | "-DCUDA_HAS_FP16=1",
36 | "-D__CUDA_NO_HALF_OPERATORS__",
37 | "-D__CUDA_NO_HALF_CONVERSIONS__",
38 | "-D__CUDA_NO_HALF2_OPERATORS__",
39 | ]
40 |
41 | sources = [os.path.join(extensions_dir, s) for s in sources]
42 |
43 | include_dirs = [extensions_dir]
44 |
45 | ext_modules = [
46 | extension(
47 | "_C",
48 | sources,
49 | include_dirs=include_dirs,
50 | define_macros=define_macros,
51 | extra_compile_args=extra_compile_args,
52 | )
53 | ]
54 |
55 | return ext_modules
56 |
57 |
58 | setup(
59 | name="pet",
60 | ext_modules=get_extensions(),
61 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
62 | )
63 |
--------------------------------------------------------------------------------
/models/ops/setup_ssd.py:
--------------------------------------------------------------------------------
1 | from Cython.Build import cythonize
2 | from Cython.Distutils import build_ext
3 | from setuptools import Extension
4 | from setuptools import setup
5 |
6 | import numpy as np
7 |
8 |
9 | # Obtain the numpy include directory. This logic works across numpy versions.
10 | try:
11 | numpy_include = np.get_include()
12 | except AttributeError:
13 | numpy_include = np.get_numpy_include()
14 |
15 |
16 | ext_modules = [
17 | Extension(
18 | name='cython_bbox',
19 | sources=['cython_bbox.pyx'],
20 | extra_compile_args=['-Wno-cpp'],
21 | include_dirs=[numpy_include]
22 | ),
23 | Extension(
24 | name='cython_nms',
25 | sources=['cython_nms.pyx'],
26 | extra_compile_args=['-Wno-cpp'],
27 | include_dirs=[numpy_include]
28 | )
29 | ]
30 |
31 | setup(
32 | name='pet',
33 | ext_modules=cythonize(ext_modules)
34 | )
35 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 |
6 | from models.ops import _C
7 |
8 |
9 | # TODO: Use JIT to replace CUDA implementation in the future.
10 | class _SigmoidFocalLoss(Function):
11 | @staticmethod
12 | def forward(ctx, logits, targets, gamma, alpha):
13 | ctx.save_for_backward(logits, targets)
14 | num_classes = logits.shape[1]
15 | ctx.num_classes = num_classes
16 | ctx.gamma = gamma
17 | ctx.alpha = alpha
18 |
19 | losses = _C.sigmoid_focalloss_forward(
20 | logits, targets, num_classes, gamma, alpha
21 | )
22 | return losses
23 |
24 | @staticmethod
25 | @once_differentiable
26 | def backward(ctx, d_loss):
27 | logits, targets = ctx.saved_tensors
28 | num_classes = ctx.num_classes
29 | gamma = ctx.gamma
30 | alpha = ctx.alpha
31 | d_loss = d_loss.contiguous()
32 | d_logits = _C.sigmoid_focalloss_backward(
33 | logits, targets, d_loss, num_classes, gamma, alpha
34 | )
35 | return d_logits, None, None, None, None
36 |
37 |
38 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
39 |
40 |
41 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
42 | num_classes = logits.shape[1]
43 | gamma = gamma[0]
44 | alpha = alpha[0]
45 | dtype = targets.dtype
46 | device = targets.device
47 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
48 |
49 | t = targets.unsqueeze(1)
50 | p = torch.sigmoid(logits)
51 | term1 = (1 - p) ** gamma * torch.log(p)
52 | term2 = p ** gamma * torch.log(1 - p)
53 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
54 |
55 |
56 | class SigmoidFocalLoss(nn.Module):
57 | def __init__(self, gamma, alpha):
58 | super(SigmoidFocalLoss, self).__init__()
59 | self.gamma = gamma
60 | self.alpha = alpha
61 |
62 | def forward(self, logits, targets):
63 | device = logits.device
64 | if logits.is_cuda:
65 | loss_func = sigmoid_focal_loss_cuda
66 | else:
67 | loss_func = sigmoid_focal_loss_cpu
68 |
69 | loss = loss_func(logits, targets, self.gamma, self.alpha)
70 | return loss.sum()
71 |
72 | def __repr__(self):
73 | tmpstr = self.__class__.__name__ + "("
74 | tmpstr += "gamma=" + str(self.gamma)
75 | tmpstr += ", alpha=" + str(self.alpha)
76 | tmpstr += ")"
77 | return tmpstr
78 |
--------------------------------------------------------------------------------
/models/ops/smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
5 | """
6 | very similar to the smooth_l1_loss from pytorch, but with
7 | the extra beta parameter
8 |
9 | Modified according to detectron2's fvcore,
10 | refer to https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
11 | """
12 | if beta < 1e-5:
13 | # if beta == 0, then torch.where will result in nan gradients when
14 | # the chain rule is applied due to pytorch implementation details
15 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
16 | # zeros, rather than "no gradient"). To avoid this issue, we define
17 | # small values of beta to be exactly l1 loss.
18 | loss = torch.abs(input - target)
19 | else:
20 | n = torch.abs(input - target)
21 | cond = n < beta
22 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
23 |
24 | if size_average:
25 | return loss.mean()
26 | return loss.sum()
27 |
28 |
29 | def smooth_l1_loss_LW(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0):
30 | """
31 | SmoothL1(x) = 0.5 * x^2 / beta if |x| < beta
32 | |x| - 0.5 * beta otherwise.
33 | 1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])).
34 | N is the number of batch elements in the input predictions
35 | """
36 | box_diff = bbox_pred - bbox_targets
37 | in_box_diff = bbox_inside_weights * box_diff
38 | abs_in_box_diff = torch.abs(in_box_diff)
39 | smoothL1_sign = (abs_in_box_diff < beta).detach().float()
40 | in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \
41 | (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta))
42 | out_loss_box = bbox_outside_weights * in_loss_box
43 | loss_box = out_loss_box
44 | N = loss_box.size(0) # batch size
45 | loss_box = loss_box.view(-1).sum(0) / N
46 | return loss_box
--------------------------------------------------------------------------------
/models/ops/squeeze_excitation.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class SeConv2d(nn.Module):
5 | def __init__(self, inplanes, innerplanse, activation=nn.ReLU):
6 | super(SeConv2d, self).__init__()
7 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
8 | self.conv = nn.Sequential(
9 | nn.Conv2d(inplanes, innerplanse, kernel_size=1),
10 | activation(),
11 | nn.Conv2d(innerplanse, inplanes, kernel_size=1),
12 | nn.Sigmoid()
13 | )
14 | self.reset_parameters()
15 |
16 | def reset_parameters(self):
17 | # weight initialization
18 | for m in self.modules():
19 | if isinstance(m, nn.Conv2d):
20 | nn.init.constant_(m.weight, 0)
21 | if m.bias is not None:
22 | nn.init.zeros_(m.bias)
23 |
24 | def forward(self, x):
25 | n, c, _, _ = x.size()
26 | y = self.avg_pool(x)
27 | y = self.conv(y)
28 | return x * y
29 |
30 |
31 | class GDWSe2d(nn.Module):
32 | def __init__(self, inplanes, kernel=3, reduction=16, with_padding=False):
33 | super(GDWSe2d, self).__init__()
34 | if with_padding:
35 | padding = kernel // 2
36 | else:
37 | padding = 0
38 |
39 | self.globle_dw = nn.Conv2d(inplanes, inplanes, kernel_size=kernel, padding=padding, stride=1,
40 | groups=inplanes, bias=False)
41 | self.bn = nn.BatchNorm2d(inplanes)
42 | self.relu = nn.ReLU(inplace=True)
43 |
44 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
45 | self.fc = nn.Sequential(
46 | nn.Linear(inplanes, inplanes // reduction),
47 | nn.ReLU(inplace=True),
48 | nn.Linear(inplanes // reduction, inplanes),
49 | nn.Sigmoid()
50 | )
51 |
52 | self._init_weights()
53 |
54 | def _init_weights(self):
55 | # weight initialization
56 | for m in self.modules():
57 | if isinstance(m, nn.Conv2d):
58 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
59 | if m.bias is not None:
60 | nn.init.zeros_(m.bias)
61 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
62 | nn.init.constant_(m.weight, 1)
63 | nn.init.constant_(m.bias, 0)
64 | elif isinstance(m, nn.Linear):
65 | nn.init.normal_(m.weight, 0, 0.01)
66 | nn.init.constant_(m.bias, 0)
67 |
68 | def forward(self, x):
69 | y = self.globle_dw(x)
70 | y = self.bn(y)
71 | y = self.relu(y)
72 |
73 | n, c, _, _ = x.size()
74 | y = self.avg_pool(y).view(n, c)
75 | y = self.fc(y).view(n, c, 1, 1)
76 | return x * y.expand_as(x)
77 |
--------------------------------------------------------------------------------
/rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/core/__init__.py
--------------------------------------------------------------------------------
/rcnn/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .transform import build_transforms
2 | from .dataset import build_dataset
3 | from .dataset import make_train_data_loader
4 | from .dataset import make_test_data_loader
5 | from .evaluation import evaluation
6 | from .evaluation import post_processing
--------------------------------------------------------------------------------
/rcnn/datasets/dataset_catalog.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 |
3 | from utils.data.dataset_catalog import COMMON_DATASETS
4 |
5 | # Root directory of project
6 | ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
7 |
8 | # Path to data dir
9 | _DATA_DIR = osp.abspath(osp.join(ROOT_DIR, 'data'))
10 |
11 | # Required dataset entry keys
12 | _IM_DIR = 'image_directory'
13 | _ANN_FN = 'annotation_file'
14 |
15 | # Available datasets
16 | _DATASETS = {
17 | }
18 | _DATASETS.update(COMMON_DATASETS)
19 |
20 |
21 | def datasets():
22 | """Retrieve the list of available dataset names."""
23 | return _DATASETS.keys()
24 |
25 |
26 | def contains(name):
27 | """Determine if the dataset is in the catalog."""
28 | return name in _DATASETS.keys()
29 |
30 |
31 | def get_im_dir(name):
32 | """Retrieve the image directory for the dataset."""
33 | return _DATASETS[name][_IM_DIR]
34 |
35 |
36 | def get_ann_fn(name):
37 | """Retrieve the annotation file for the dataset."""
38 | return _DATASETS[name][_ANN_FN]
39 |
--------------------------------------------------------------------------------
/rcnn/datasets/transform.py:
--------------------------------------------------------------------------------
1 | from utils.data import transforms as T
2 |
3 | from rcnn.core.config import cfg
4 |
5 |
6 | def build_transforms(is_train=True):
7 | if is_train:
8 | min_size = cfg.TRAIN.SCALES
9 | max_size = cfg.TRAIN.MAX_SIZE
10 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN
11 | brightness = cfg.TRAIN.BRIGHTNESS
12 | contrast = cfg.TRAIN.CONTRAST
13 | saturation = cfg.TRAIN.SATURATION
14 | hue = cfg.TRAIN.HUE
15 | left_right = cfg.TRAIN.LEFT_RIGHT
16 |
17 | # for force resize
18 | force_test_scale = [-1, -1]
19 | scale_ratios = cfg.TRAIN.RANDOM_CROP.SCALE_RATIOS
20 |
21 | # for random crop
22 | preprocess_type = cfg.TRAIN.PREPROCESS_TYPE
23 |
24 | crop_sizes = cfg.TRAIN.RANDOM_CROP.CROP_SCALES
25 | crop_iou_ths = cfg.TRAIN.RANDOM_CROP.IOU_THS
26 | pad_pixel = cfg.TRAIN.RANDOM_CROP.PAD_PIXEL
27 | pad_pixel = (cfg.PIXEL_MEANS if len(pad_pixel) < 3 else pad_pixel)
28 | else:
29 | min_size = cfg.TEST.SCALE
30 | max_size = cfg.TEST.MAX_SIZE
31 | flip_prob = 0
32 | brightness = 0.0
33 | contrast = 0.0
34 | saturation = 0.0
35 | hue = 0.0
36 | left_right = ()
37 |
38 | # for force resize
39 | force_test_scale = cfg.TEST.FORCE_TEST_SCALE
40 | scale_ratios = ()
41 |
42 | # for random crop
43 | preprocess_type = "none"
44 |
45 | crop_sizes = ()
46 | pad_pixel = ()
47 | crop_iou_ths = ()
48 |
49 | to_bgr255 = cfg.TO_BGR255
50 | normalize_transform = T.Normalize(
51 | mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS, to_bgr255=to_bgr255
52 | )
53 |
54 | color_jitter = T.ColorJitter(
55 | brightness=brightness,
56 | contrast=contrast,
57 | saturation=saturation,
58 | hue=hue,
59 | )
60 |
61 | transform = T.Compose(
62 | [
63 | color_jitter,
64 | T.Resize(min_size, max_size, preprocess_type, scale_ratios, force_test_scale),
65 | T.RandomCrop(preprocess_type, crop_sizes, pad_pixel, crop_iou_ths),
66 | T.RandomHorizontalFlip(flip_prob, left_right),
67 | T.ToTensor(),
68 | normalize_transform,
69 | ]
70 | )
71 | return transform
72 |
--------------------------------------------------------------------------------
/rcnn/modeling/backbone/MobileNet_v2.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch.nn as nn
4 |
5 | import models.imagenet.mobilenet_v2 as mv2
6 | import models.ops as ops
7 | from models.imagenet.utils import make_divisible
8 | from utils.net import freeze_params, make_norm
9 | from rcnn.modeling import registry
10 | from rcnn.core.config import cfg
11 |
12 |
13 | class MobileNetV2(mv2.MobileNetV2):
14 | def __init__(self, norm='bn', activation=nn.ReLU6, stride=32):
15 | """ Constructor
16 | """
17 | super(MobileNetV2, self).__init__()
18 | block = mv2.LinearBottleneck
19 | self.use_se = cfg.BACKBONE.MV2.USE_SE
20 | self.widen_factor = cfg.BACKBONE.MV2.WIDEN_FACTOR
21 | self.norm = norm
22 | self.activation_type = activation
23 | try:
24 | self.activation = activation(inplace=True)
25 | except:
26 | self.activation = activation()
27 | self.stride = stride
28 |
29 | layers_cfg = mv2.model_se(mv2.MV2_CFG['A']) if self.use_se else mv2.MV2_CFG['A']
30 | num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]]
31 | self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels]
32 | self.layers = [len(lc) for lc in layers_cfg[2:-1]]
33 |
34 | self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8)
35 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4],
36 | padding=layers_cfg[0][0][0] // 2, bias=False)
37 | self.bn1 = make_norm(self.inplanes, norm=self.norm)
38 |
39 | self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1)
40 | self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1)
41 | self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1)
42 | self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1)
43 | self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1)
44 |
45 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
46 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
47 |
48 | del self.conv_out
49 | del self.bn_out
50 | del self.avgpool
51 | del self.fc
52 | self._init_weights()
53 | self._init_modules()
54 |
55 | def _init_modules(self):
56 | assert cfg.BACKBONE.MV2.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.MV2.FREEZE_AT: 2
57 | assert cfg.BACKBONE.MV2.FREEZE_AT <= len(self.layers) + 1
58 | if cfg.BACKBONE.MV2.FREEZE_AT > 0:
59 | freeze_params(getattr(self, 'conv1'))
60 | freeze_params(getattr(self, 'bn1'))
61 | for i in range(0, cfg.BACKBONE.MV2.FREEZE_AT):
62 | freeze_params(getattr(self, 'layer%d' % i))
63 | # Freeze all bn (affine) layers !!!
64 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
65 |
66 | def train(self, mode=True):
67 | # Override train mode
68 | self.training = mode
69 | if cfg.BACKBONE.MV2.FREEZE_AT < 1:
70 | getattr(self, 'conv1').train(mode)
71 | getattr(self, 'bn1').train(mode)
72 | for i in range(cfg.BACKBONE.MV2.FREEZE_AT, len(self.layers) + 1):
73 | getattr(self, 'layer%d' % i).train(mode)
74 |
75 | def forward(self, x):
76 | x = self.conv1(x)
77 | x = self.bn1(x)
78 | x = self.activation(x)
79 |
80 | x = self.layer0(x)
81 | x2 = self.layer1(x)
82 | x3 = self.layer2(x2)
83 | x4 = self.layer3(x3)
84 | x5 = self.layer4(x4)
85 |
86 | return [x2, x3, x4, x5]
87 |
88 |
89 | # ---------------------------------------------------------------------------- #
90 | # MobileNetV2 Conv Body
91 | # ---------------------------------------------------------------------------- #
92 | @registry.BACKBONES.register("mobilenet_v2")
93 | def mobilenet_v2():
94 | model = MobileNetV2()
95 | return model
96 |
--------------------------------------------------------------------------------
/rcnn/modeling/backbone/MobileNet_v3.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | import models.imagenet.mobilenet_v3 as mv3
7 | import models.ops as ops
8 | from models.imagenet.utils import make_divisible, convert_conv2convsamepadding_model
9 | from utils.net import freeze_params, make_norm
10 | from rcnn.modeling import registry
11 | from rcnn.core.config import cfg
12 |
13 |
14 | class MobileNetV3(mv3.MobileNetV3):
15 | def __init__(self, norm='bn', activation=mv3.H_Swish, stride=32):
16 | """ Constructor
17 | """
18 | super(MobileNetV3, self).__init__()
19 | block = mv3.LinearBottleneck
20 | self.widen_factor = cfg.BACKBONE.MV3.WIDEN_FACTOR
21 | self.norm = norm
22 | self.se_reduce_mid = cfg.BACKBONE.MV3.SE_REDUCE_MID
23 | self.se_divisible = cfg.BACKBONE.MV3.SE_DIVISIBLE
24 | self.head_use_bias = cfg.BACKBONE.MV3.HEAD_USE_BIAS
25 | self.force_residual = cfg.BACKBONE.MV3.FORCE_RESIDUAL
26 | self.sync_se_act = cfg.BACKBONE.MV3.SYNC_SE_ACT
27 | self.bn_eps = cfg.BACKBONE.BN_EPS
28 | self.activation_type = activation
29 | self.stride = stride
30 |
31 | setting = cfg.BACKBONE.MV3.SETTING
32 | layers_cfg = mv3.MV3_CFG[setting]
33 | num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]]
34 | self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels]
35 | self.activation = activation() if layers_cfg[0][0][3] else nn.ReLU(inplace=True)
36 | self.layers = [len(lc) for lc in layers_cfg[2:-1]]
37 |
38 | self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8)
39 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4],
40 | padding=layers_cfg[0][0][0] // 2, bias=False)
41 | self.bn1 = make_norm(self.inplanes, norm=self.norm, eps=self.bn_eps)
42 |
43 | self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1) if layers_cfg[1][0][0] else None
44 | self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1)
45 | self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1)
46 | self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1)
47 | self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1)
48 |
49 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
50 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
51 |
52 | del self.last_stage
53 | del self.avgpool
54 | del self.conv_out
55 | del self.fc
56 | self._init_weights()
57 | self._init_modules()
58 |
59 | def _init_modules(self):
60 | assert cfg.BACKBONE.MV3.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.MV3.FREEZE_AT: 2
61 | assert cfg.BACKBONE.MV3.FREEZE_AT <= len(self.layers) + 1
62 | if cfg.BACKBONE.MV3.FREEZE_AT > 0:
63 | freeze_params(getattr(self, 'conv1'))
64 | freeze_params(getattr(self, 'bn1'))
65 | for i in range(0, cfg.BACKBONE.MV3.FREEZE_AT):
66 | if i == 0:
67 | freeze_params(getattr(self, 'layer0')) if self.layer0 is not None else None
68 | else:
69 | freeze_params(getattr(self, 'layer%d' % i))
70 | # Freeze all bn (affine) layers !!!
71 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
72 |
73 | def train(self, mode=True):
74 | # Override train mode
75 | self.training = mode
76 | if cfg.BACKBONE.MV3.FREEZE_AT < 1:
77 | getattr(self, 'conv1').train(mode)
78 | getattr(self, 'bn1').train(mode)
79 | for i in range(cfg.BACKBONE.MV3.FREEZE_AT, len(self.layers) + 1):
80 | if i == 0:
81 | getattr(self, 'layer0').train(mode) if self.layer0 is not None else None
82 | else:
83 | getattr(self, 'layer%d' % i).train(mode)
84 |
85 | def forward(self, x):
86 | x = self.conv1(x)
87 | x = self.bn1(x)
88 | x = self.activation(x)
89 |
90 | if self.layer0 is not None:
91 | x = self.layer0(x)
92 | x2 = self.layer1(x)
93 | x3 = self.layer2(x2)
94 | x4 = self.layer3(x3)
95 | x5 = self.layer4(x4)
96 |
97 | return [x2, x3, x4, x5]
98 |
99 |
100 | # ---------------------------------------------------------------------------- #
101 | # MobileNet V3 Conv Body
102 | # ---------------------------------------------------------------------------- #
103 | @registry.BACKBONES.register("mobilenet_v3")
104 | def mobilenet_v3():
105 | model = MobileNetV3()
106 | if cfg.BACKBONE.MV3.SAME_PAD:
107 | model = convert_conv2convsamepadding_model(model)
108 | return model
109 |
--------------------------------------------------------------------------------
/rcnn/modeling/backbone/VoVNet.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch.nn as nn
4 |
5 | import models.imagenet.vovnet as vov
6 | import models.ops as ops
7 | from utils.net import freeze_params, make_norm
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | def get_norm():
13 | norm = 'bn'
14 | if cfg.BACKBONE.VOV.USE_GN:
15 | norm = 'gn'
16 | return norm
17 |
18 |
19 | class VoVNet(vov.VoVNet):
20 | def __init__(self, norm='bn', stride=32):
21 | """ Constructor
22 | """
23 | super(VoVNet, self).__init__()
24 | block = vov.OSABlock
25 | self.num_conv = cfg.BACKBONE.VOV.NUM_CONV # 5
26 | self.norm = norm
27 | self.stride = stride
28 |
29 | base_width = cfg.BACKBONE.VOV.WIDTH # 64
30 | stage_dims = cfg.BACKBONE.VOV.STAGE_DIMS
31 | concat_dims = cfg.BACKBONE.VOV.CONCAT_DIMS
32 | layers = cfg.BACKBONE.VOV.LAYERS
33 | self.layers = layers
34 | stage_with_conv = cfg.BACKBONE.VOV.STAGE_WITH_CONV
35 | self.channels = [base_width] + list(concat_dims)
36 |
37 | self.inplanes = base_width
38 | self.conv1 = nn.Conv2d(3, self.inplanes, 3, 2, 1, bias=False)
39 | self.bn1 = make_norm(self.inplanes, norm=self.norm)
40 | self.conv2 = nn.Conv2d(self.inplanes, self.inplanes, 3, 1, 1, bias=False)
41 | self.bn2 = make_norm(self.inplanes, norm=self.norm)
42 | self.conv3 = nn.Conv2d(self.inplanes, self.inplanes * 2, 3, 2, 1, bias=False)
43 | self.bn3 = make_norm(self.inplanes * 2, norm=self.norm)
44 | self.relu = nn.ReLU(inplace=True)
45 | self.inplanes = self.inplanes * 2
46 |
47 | self.layer1 = self._make_layer(block, stage_dims[0], concat_dims[0], layers[0], 1, conv=stage_with_conv[0])
48 | self.layer2 = self._make_layer(block, stage_dims[1], concat_dims[1], layers[1], 2, conv=stage_with_conv[1])
49 | self.layer3 = self._make_layer(block, stage_dims[2], concat_dims[2], layers[2], 2, conv=stage_with_conv[2])
50 | self.layer4 = self._make_layer(block, stage_dims[3], concat_dims[3], layers[3], 2, conv=stage_with_conv[3])
51 |
52 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.]
53 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))]
54 |
55 | del self.avgpool
56 | del self.fc
57 | self._init_weights()
58 | self._init_modules()
59 |
60 | def _init_modules(self):
61 | assert cfg.BACKBONE.VOV.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.VOV.FREEZE_AT: 2
62 | assert cfg.BACKBONE.VOV.FREEZE_AT <= len(self.layers) + 1
63 | if cfg.BACKBONE.VOV.FREEZE_AT > 0:
64 | freeze_params(getattr(self, 'conv1'))
65 | freeze_params(getattr(self, 'bn1'))
66 | freeze_params(getattr(self, 'conv2'))
67 | freeze_params(getattr(self, 'bn2'))
68 | freeze_params(getattr(self, 'conv3'))
69 | freeze_params(getattr(self, 'bn3'))
70 | for i in range(1, cfg.BACKBONE.VOV.FREEZE_AT):
71 | freeze_params(getattr(self, 'layer%d' % i))
72 | # Freeze all bn (affine) layers !!!
73 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None)
74 |
75 | def train(self, mode=True):
76 | # Override train mode
77 | self.training = mode
78 | if cfg.BACKBONE.VOV.FREEZE_AT < 1:
79 | getattr(self, 'conv1').train(mode)
80 | getattr(self, 'bn1').train(mode)
81 | getattr(self, 'conv2').train(mode)
82 | getattr(self, 'bn2').train(mode)
83 | getattr(self, 'conv3').train(mode)
84 | getattr(self, 'bn3').train(mode)
85 | for i in range(cfg.BACKBONE.VOV.FREEZE_AT, len(self.layers) + 1):
86 | if i == 0:
87 | continue
88 | getattr(self, 'layer%d' % i).train(mode)
89 |
90 | def forward(self, x):
91 | x = self.conv1(x)
92 | x = self.bn1(x)
93 | x = self.relu(x)
94 | x = self.conv2(x)
95 | x = self.bn2(x)
96 | x = self.relu(x)
97 | x = self.conv3(x)
98 | x = self.bn3(x)
99 | x = self.relu(x)
100 |
101 | x2 = self.layer1(x)
102 | x3 = self.layer2(x2)
103 | x4 = self.layer3(x3)
104 | x5 = self.layer4(x4)
105 |
106 | return [x2, x3, x4, x5]
107 |
108 | # ---------------------------------------------------------------------------- #
109 | # VoVNet Conv Body
110 | # ---------------------------------------------------------------------------- #
111 | @registry.BACKBONES.register("vovnet")
112 | def vovnet():
113 | model = VoVNet(norm=get_norm())
114 | return model
115 |
--------------------------------------------------------------------------------
/rcnn/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .HRNet import *
2 | from .MobileNet_v1 import *
3 | from .MobileNet_v2 import *
4 | from .MobileNet_v3 import *
5 | from .ResNet import *
6 | from .ResNeXt import *
7 | from .VoVNet import *
8 |
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/cascade_rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/cascade_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from rcnn.modeling.cascade_rcnn import heads
5 | from rcnn.modeling.cascade_rcnn import outputs
6 | from rcnn.modeling.cascade_rcnn.inference import box_post_processor
7 | from rcnn.modeling.cascade_rcnn.loss import box_loss_evaluator
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | class CascadeRCNN(torch.nn.Module):
13 | """
14 | Generic Box Head class.
15 | """
16 | def __init__(self, dim_in, spatial_scale):
17 | super(CascadeRCNN, self).__init__()
18 | self.num_stage = cfg.CASCADE_RCNN.NUM_STAGE
19 | self.test_stage = cfg.CASCADE_RCNN.TEST_STAGE
20 | self.stage_loss_weights = cfg.CASCADE_RCNN.STAGE_WEIGHTS
21 | self.test_ensemble = cfg.CASCADE_RCNN.TEST_ENSEMBLE
22 |
23 | head = registry.ROI_CASCADE_HEADS[cfg.CASCADE_RCNN.ROI_BOX_HEAD]
24 | output = registry.ROI_CASCADE_OUTPUTS[cfg.CASCADE_RCNN.ROI_BOX_OUTPUT]
25 |
26 | for stage in range(1, self.num_stage + 1):
27 | stage_name = '_{}'.format(stage)
28 | setattr(self, 'Box_Head' + stage_name, head(dim_in, spatial_scale))
29 | setattr(self, 'Output' + stage_name, output(getattr(self, 'Box_Head' + stage_name).dim_out))
30 |
31 | def forward(self, features, proposals, targets=None):
32 | if self.training:
33 | return self._forward_train(features, proposals, targets)
34 | else:
35 | return self._forward_test(features, proposals)
36 |
37 | def _forward_train(self, features, proposals, targets=None):
38 | all_loss = dict()
39 | for i in range(self.num_stage):
40 | head = getattr(self, 'Box_Head_{}'.format(i + 1))
41 | output = getattr(self, 'Output_{}'.format(i + 1))
42 | loss_evaluator = box_loss_evaluator(i)
43 |
44 | # Cascade R-CNN subsamples during training the proposals with a fixed
45 | # positive / negative ratio
46 | with torch.no_grad():
47 | proposals = loss_evaluator.subsample(proposals, targets)
48 |
49 | # extract features that will be fed to the final classifier. The
50 | # feature_extractor generally corresponds to the pooler + heads
51 | x = head(features, proposals)
52 | # final classifier that converts the features into predictions
53 | class_logits, box_regression = output(x)
54 |
55 | loss_classifier, loss_box_reg = loss_evaluator([class_logits], [box_regression])
56 | loss_scalar = self.stage_loss_weights[i]
57 | all_loss['s{}_cls_loss'.format(i + 1)] = loss_classifier * loss_scalar
58 | all_loss['s{}_bbox_loss'.format(i + 1)] = loss_box_reg * loss_scalar
59 |
60 | with torch.no_grad():
61 | if i < self.num_stage - 1:
62 | post_processor_train = box_post_processor(i, is_train=True)
63 | proposals = post_processor_train((class_logits, box_regression), proposals, targets)
64 |
65 | return x, proposals, all_loss
66 |
67 | def _forward_test(self, features, proposals):
68 | ms_scores = []
69 | for i in range(self.num_stage):
70 | head = getattr(self, 'Box_Head_{}'.format(i + 1))
71 | output = getattr(self, 'Output_{}'.format(i + 1))
72 | post_processor_test = box_post_processor(i, is_train=False)
73 | # extract features that will be fed to the final classifier. The
74 | # feature_extractor generally corresponds to the pooler + heads
75 | x = head(features, proposals)
76 | # final classifier that converts the features into predictions
77 | class_logits, box_regression = output(x)
78 | ms_scores.append(class_logits)
79 |
80 | if i < self.test_stage - 1:
81 | proposals = post_processor_test((class_logits, box_regression), proposals)
82 | else:
83 | if self.test_ensemble:
84 | assert len(ms_scores) == self.test_stage
85 | class_logits = sum(ms_scores) / self.test_stage
86 | result = post_processor_test((class_logits, box_regression), proposals)
87 | return x, result, {}
88 |
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convfc_heads import *
2 | from .mlp_heads import *
3 |
4 |
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/convfc_heads.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | from models.imagenet.utils import convert_conv2convws_model
5 | from utils.net import make_conv, make_fc
6 | from rcnn.utils.poolers import Pooler
7 | from rcnn.modeling import registry
8 | from rcnn.core.config import cfg
9 |
10 |
11 | @registry.ROI_CASCADE_HEADS.register("roi_xconv1fc_head")
12 | class roi_xconv1fc_head(nn.Module):
13 | """Add a X conv + 1fc head"""
14 |
15 | def __init__(self, dim_in, spatial_scale):
16 | super().__init__()
17 | self.dim_in = dim_in[-1]
18 |
19 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD
20 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
21 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
22 | pooler = Pooler(
23 | method=method,
24 | output_size=resolution,
25 | scales=spatial_scale,
26 | sampling_ratio=sampling_ratio,
27 | )
28 | self.pooler = pooler
29 |
30 | use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
31 | use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
32 | use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
33 | conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
34 | num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
35 | dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
36 |
37 | xconvs = []
38 | for ix in range(num_stacked_convs):
39 | xconvs.append(
40 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
41 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
42 | )
43 | self.dim_in = conv_dim
44 | self.add_module("xconvs", nn.Sequential(*xconvs))
45 |
46 | input_size = self.dim_in * resolution[0] * resolution[1]
47 | mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
48 | self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
49 | self.dim_out = mlp_dim
50 |
51 | if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
52 | self = convert_conv2convws_model(self)
53 |
54 | def forward(self, x, proposals):
55 | x = self.pooler(x, proposals)
56 | x = self.xconvs(x)
57 | x = x.view(x.size(0), -1)
58 | x = F.relu(self.fc6(x), inplace=True)
59 |
60 | return x
61 |
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/heads/mlp_heads.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from models.imagenet.utils import convert_conv2convws_model
6 | from utils.net import make_fc
7 | from rcnn.utils.poolers import Pooler
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | @registry.ROI_CASCADE_HEADS.register("roi_2mlp_head")
13 | class roi_2mlp_head(nn.Module):
14 | """Add a ReLU MLP with two hidden layers."""
15 |
16 | def __init__(self, dim_in, spatial_scale):
17 | super().__init__()
18 | self.dim_in = dim_in[-1]
19 |
20 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD
21 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
22 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
23 | pooler = Pooler(
24 | method=method,
25 | output_size=resolution,
26 | scales=spatial_scale,
27 | sampling_ratio=sampling_ratio,
28 | )
29 | input_size = self.dim_in * resolution[0] * resolution[1]
30 | mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
31 | use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN
32 | use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN
33 | self.pooler = pooler
34 | self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn)
35 | self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn)
36 | self.dim_out = mlp_dim
37 |
38 | if cfg.FAST_RCNN.MLP_HEAD.USE_WS:
39 | self = convert_conv2convws_model(self)
40 |
41 | def forward(self, x, proposals):
42 | x = self.pooler(x, proposals)
43 | x = x.view(x.size(0), -1)
44 |
45 | x = F.relu(self.fc6(x), inplace=True)
46 | x = F.relu(self.fc7(x), inplace=True)
47 |
48 | return x
49 |
--------------------------------------------------------------------------------
/rcnn/modeling/cascade_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.init as init
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | # ---------------------------------------------------------------------------- #
9 | # R-CNN bbox branch outputs
10 | # ---------------------------------------------------------------------------- #
11 | @registry.ROI_CASCADE_OUTPUTS.register("box_output")
12 | class Box_output(nn.Module):
13 | def __init__(self, dim_in):
14 | super().__init__()
15 | self.dim_in = dim_in
16 |
17 | self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES)
18 | # self.avgpool = nn.AdaptiveAvgPool2d(1)
19 | if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG: # bg and fg
20 | self.bbox_pred = nn.Linear(self.dim_in, 4 * 2)
21 | else:
22 | raise NotImplementedError
23 | # self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES)
24 |
25 | self._init_weights()
26 |
27 | def _init_weights(self):
28 | init.normal_(self.cls_score.weight, std=0.01)
29 | init.constant_(self.cls_score.bias, 0)
30 | init.normal_(self.bbox_pred.weight, std=0.001)
31 | init.constant_(self.bbox_pred.bias, 0)
32 |
33 | def forward(self, x):
34 | if x.ndimension() == 4:
35 | x = nn.functional.adaptive_avg_pool2d(x, 1)
36 | # x = self.avgpool(x)
37 | x = x.view(x.size(0), -1)
38 | cls_score = self.cls_score(x)
39 | bbox_pred = self.bbox_pred(x)
40 |
41 | return cls_score, bbox_pred
42 |
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/fast_rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/fast_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from rcnn.modeling.fast_rcnn import heads
5 | from rcnn.modeling.fast_rcnn import outputs
6 | from rcnn.modeling.fast_rcnn.inference import box_post_processor
7 | from rcnn.modeling.fast_rcnn.loss import box_loss_evaluator
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | class FastRCNN(torch.nn.Module):
13 | """
14 | Generic Box Head class.
15 | """
16 |
17 | def __init__(self, dim_in, spatial_scale):
18 | super(FastRCNN, self).__init__()
19 | head = registry.ROI_BOX_HEADS[cfg.FAST_RCNN.ROI_BOX_HEAD]
20 | self.Head = head(dim_in, spatial_scale)
21 | output = registry.ROI_BOX_OUTPUTS[cfg.FAST_RCNN.ROI_BOX_OUTPUT]
22 | self.Output = output(self.Head.dim_out)
23 |
24 | self.post_processor = box_post_processor()
25 | self.loss_evaluator = box_loss_evaluator()
26 |
27 | def forward(self, features, proposals, targets=None):
28 | """
29 | Arguments:
30 | features (list[Tensor]): feature-maps from possibly several levels
31 | proposals (list[BoxList]): proposal boxes
32 | targets (list[BoxList], optional): the ground-truth targets.
33 |
34 | Returns:
35 | x (Tensor): the result of the feature extractor
36 | proposals (list[BoxList]): during training, the subsampled proposals
37 | are returned. During testing, the predicted boxlists are returned
38 | losses (dict[Tensor]): During training, returns the losses for the
39 | head. During testing, returns an empty dict.
40 | """
41 | if self.training:
42 | return self._forward_train(features, proposals, targets)
43 | else:
44 | return self._forward_test(features, proposals)
45 |
46 | def _forward_train(self, features, proposals, targets=None):
47 | # Faster R-CNN subsamples during training the proposals with a fixed
48 | # positive / negative ratio
49 | with torch.no_grad():
50 | proposals = self.loss_evaluator.subsample(proposals, targets)
51 |
52 | # extract features that will be fed to the final classifier. The
53 | # feature_extractor generally corresponds to the pooler + heads
54 | x = self.Head(features, proposals)
55 | # final classifier that converts the features into predictions
56 | class_logits, box_regression = self.Output(x)
57 |
58 | losses = self.loss_evaluator([class_logits], [box_regression])
59 | return x, proposals, losses
60 |
61 | def _forward_test(self, features, proposals):
62 | # extract features that will be fed to the final classifier. The
63 | # feature_extractor generally corresponds to the pooler + heads
64 | x = self.Head(features, proposals)
65 | # final classifier that converts the features into predictions
66 | class_logits, box_regression = self.Output(x)
67 |
68 | result = self.post_processor((class_logits, box_regression), proposals)
69 | return x, result, {}
70 |
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp_heads import *
2 | from .convfc_heads import *
3 |
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/convfc_heads.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | from models.imagenet.utils import convert_conv2convws_model
5 | from utils.net import make_conv, make_fc
6 | from rcnn.utils.poolers import Pooler
7 | from rcnn.modeling import registry
8 | from rcnn.core.config import cfg
9 |
10 |
11 | @registry.ROI_BOX_HEADS.register("roi_xconv1fc_head")
12 | class roi_xconv1fc_head(nn.Module):
13 | """Add a X conv + 1fc head"""
14 |
15 | def __init__(self, dim_in, spatial_scale):
16 | super().__init__()
17 | self.dim_in = dim_in[-1]
18 |
19 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD
20 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
21 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
22 | pooler = Pooler(
23 | method=method,
24 | output_size=resolution,
25 | scales=spatial_scale,
26 | sampling_ratio=sampling_ratio,
27 | )
28 | self.pooler = pooler
29 |
30 | use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
31 | use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
32 | use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
33 | conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
34 | num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
35 | dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
36 |
37 | xconvs = []
38 | for ix in range(num_stacked_convs):
39 | xconvs.append(
40 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
41 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
42 | )
43 | self.dim_in = conv_dim
44 | self.add_module("xconvs", nn.Sequential(*xconvs))
45 |
46 | input_size = self.dim_in * resolution[0] * resolution[1]
47 | mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
48 | self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
49 | self.dim_out = mlp_dim
50 |
51 | if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
52 | self = convert_conv2convws_model(self)
53 |
54 | def forward(self, x, proposals):
55 | x = self.pooler(x, proposals)
56 | x = self.xconvs(x)
57 | x = x.view(x.size(0), -1)
58 | x = F.relu(self.fc6(x), inplace=True)
59 |
60 | return x
61 |
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/heads/mlp_heads.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from models.imagenet.utils import convert_conv2convws_model
6 | from utils.net import make_fc
7 | from rcnn.utils.poolers import Pooler
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | @registry.ROI_BOX_HEADS.register("roi_2mlp_head")
13 | class roi_2mlp_head(nn.Module):
14 | """Add a ReLU MLP with two hidden layers."""
15 |
16 | def __init__(self, dim_in, spatial_scale):
17 | super().__init__()
18 | self.dim_in = dim_in[-1]
19 |
20 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD
21 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
22 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
23 | pooler = Pooler(
24 | method=method,
25 | output_size=resolution,
26 | scales=spatial_scale,
27 | sampling_ratio=sampling_ratio,
28 | )
29 | input_size = self.dim_in * resolution[0] * resolution[1]
30 | mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM
31 | use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN
32 | use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN
33 | self.pooler = pooler
34 | self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn)
35 | self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn)
36 | self.dim_out = mlp_dim
37 |
38 | if cfg.FAST_RCNN.MLP_HEAD.USE_WS:
39 | self = convert_conv2convws_model(self)
40 |
41 | def forward(self, x, proposals):
42 | x = self.pooler(x, proposals)
43 | x = x.view(x.size(0), -1)
44 |
45 | x = F.relu(self.fc6(x), inplace=True)
46 | x = F.relu(self.fc7(x), inplace=True)
47 |
48 | return x
49 |
--------------------------------------------------------------------------------
/rcnn/modeling/fast_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.init as init
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | # ---------------------------------------------------------------------------- #
9 | # R-CNN bbox branch outputs
10 | # ---------------------------------------------------------------------------- #
11 | @registry.ROI_BOX_OUTPUTS.register("box_output")
12 | class Box_output(nn.Module):
13 | def __init__(self, dim_in):
14 | super().__init__()
15 | self.dim_in = dim_in
16 | self.cls_on = cfg.FAST_RCNN.CLS_ON
17 | self.reg_on = cfg.FAST_RCNN.REG_ON
18 |
19 | if self.cls_on:
20 | self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES)
21 | init.normal_(self.cls_score.weight, std=0.01)
22 | init.constant_(self.cls_score.bias, 0)
23 | # self.avgpool = nn.AdaptiveAvgPool2d(1)
24 | if self.reg_on:
25 | if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG: # bg and fg
26 | self.bbox_pred = nn.Linear(self.dim_in, 4 * 2)
27 | else:
28 | self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES)
29 | init.normal_(self.bbox_pred.weight, std=0.001)
30 | init.constant_(self.bbox_pred.bias, 0)
31 |
32 | def forward(self, x):
33 | if x.ndimension() == 4:
34 | x = nn.functional.adaptive_avg_pool2d(x, 1)
35 | # x = self.avgpool(x)
36 | x = x.view(x.size(0), -1)
37 | cls_score = self.cls_score(x) if self.cls_on else None
38 | bbox_pred = self.bbox_pred(x) if self.reg_on else None
39 |
40 | return cls_score, bbox_pred
41 |
--------------------------------------------------------------------------------
/rcnn/modeling/fpn/HRFPN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from utils.net import make_conv
6 | from rcnn.core.config import cfg
7 | from rcnn.modeling import registry
8 |
9 |
10 | # ---------------------------------------------------------------------------- #
11 | # Functions for bolting HRFPN onto a backbone architectures
12 | # ---------------------------------------------------------------------------- #
13 | @registry.FPN_BODY.register("hrfpn")
14 | class hrfpn(nn.Module):
15 | # dim_in = [w, w * 2, w * 4, w * 8]
16 | # spatial_scale = [1/4, 1/8, 1/16, 1/32]
17 | def __init__(self, dim_in, spatial_scale):
18 | super().__init__()
19 | self.dim_in = sum(dim_in)
20 | self.spatial_scale = spatial_scale
21 |
22 | hrfpn_dim = cfg.FPN.HRFPN.DIM # 256
23 | use_lite = cfg.FPN.HRFPN.USE_LITE
24 | use_bn = cfg.FPN.HRFPN.USE_BN
25 | use_gn = cfg.FPN.HRFPN.USE_GN
26 | if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG':
27 | self.pooling = F.avg_pool2d
28 | else:
29 | self.pooling = F.max_pool2d
30 | self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING # 1
31 | self.num_output = len(dim_in) + self.num_extra_pooling # 5
32 |
33 | self.reduction_conv = make_conv(self.dim_in, hrfpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn)
34 | self.dim_in = hrfpn_dim
35 |
36 | self.fpn_conv = nn.ModuleList()
37 | for i in range(self.num_output):
38 | self.fpn_conv.append(
39 | make_conv(self.dim_in, hrfpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn,
40 | suffix_1x1=use_lite)
41 | )
42 | self.dim_in = hrfpn_dim
43 |
44 | if self.num_extra_pooling:
45 | self.spatial_scale.append(self.spatial_scale[-1] * 0.5)
46 | self.dim_out = [self.dim_in for _ in range(self.num_output)]
47 | self._init_weights()
48 |
49 | def _init_weights(self):
50 | # weight initialization
51 | for m in self.modules():
52 | if isinstance(m, nn.Conv2d):
53 | nn.init.kaiming_uniform_(m.weight, a=1)
54 | if m.bias is not None:
55 | nn.init.zeros_(m.bias)
56 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
57 | nn.init.constant_(m.weight, 1)
58 | nn.init.constant_(m.bias, 0)
59 |
60 | def forward(self, x):
61 | outs = [x[0]]
62 | for i in range(1, len(x)):
63 | outs.append(F.interpolate(x[i], scale_factor=2**i, mode='bilinear'))
64 | out = torch.cat(outs, dim=1)
65 | out = self.reduction_conv(out)
66 |
67 | outs = [out]
68 | for i in range(1, self.num_output):
69 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
70 | fpn_output_blobs = []
71 | for i in range(self.num_output):
72 | fpn_output_blobs.append(self.fpn_conv[i](outs[i]))
73 |
74 | # use all levels
75 | return fpn_output_blobs # [P2 - P6]
76 |
--------------------------------------------------------------------------------
/rcnn/modeling/fpn/__init__.py:
--------------------------------------------------------------------------------
1 | from .FPN import *
2 | from .HRFPN import *
3 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/keypoint_rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from utils.net import make_conv
5 | from rcnn.utils.poolers import Pooler
6 | from rcnn.modeling import registry
7 | from rcnn.core.config import cfg
8 |
9 |
10 | @registry.ROI_KEYPOINT_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 | def __init__(self, dim_in, spatial_scale):
13 | super(roi_convx_head, self).__init__()
14 | self.dim_in = dim_in[-1]
15 |
16 | method = cfg.KRCNN.ROI_XFORM_METHOD
17 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
18 | sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO
19 | pooler = Pooler(
20 | method=method,
21 | output_size=resolution,
22 | scales=spatial_scale,
23 | sampling_ratio=sampling_ratio,
24 | )
25 | self.pooler = pooler
26 |
27 | use_lite = cfg.KRCNN.CONVX_HEAD.USE_LITE
28 | use_bn = cfg.KRCNN.CONVX_HEAD.USE_BN
29 | use_gn = cfg.KRCNN.CONVX_HEAD.USE_GN
30 | conv_dim = cfg.KRCNN.CONVX_HEAD.CONV_DIM
31 | num_stacked_convs = cfg.KRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 | dilation = cfg.KRCNN.CONVX_HEAD.DILATION
33 |
34 | self.blocks = []
35 | for layer_idx in range(num_stacked_convs):
36 | layer_name = "keypoint_fcn{}".format(layer_idx + 1)
37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 | self.add_module(layer_name, module)
40 | self.dim_in = conv_dim
41 | self.blocks.append(layer_name)
42 | self.dim_out = self.dim_in
43 |
44 | def forward(self, x, proposals):
45 | x = self.pooler(x, proposals)
46 |
47 | for layer_name in self.blocks:
48 | x = F.relu(getattr(self, layer_name)(x))
49 |
50 | return x
51 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/heads/gce_heads.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from models.ops import interpolate, NonLocal2d
5 | from rcnn.core.config import cfg
6 | from rcnn.modeling import registry
7 | from rcnn.utils.poolers import Pooler
8 | from utils.net import make_conv
9 |
10 |
11 | @registry.ROI_KEYPOINT_HEADS.register("roi_gce_head")
12 | class roi_gce_head(nn.Module):
13 | def __init__(self, dim_in, spatial_scale):
14 | super(roi_gce_head, self).__init__()
15 | self.dim_in = dim_in[-1]
16 |
17 | method = cfg.KRCNN.ROI_XFORM_METHOD
18 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
19 | sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO
20 | pooler = Pooler(
21 | method=method,
22 | output_size=resolution,
23 | scales=spatial_scale,
24 | sampling_ratio=sampling_ratio,
25 | )
26 | self.pooler = pooler
27 |
28 | use_nl = cfg.KRCNN.GCE_HEAD.USE_NL
29 | use_bn = cfg.KRCNN.GCE_HEAD.USE_BN
30 | use_gn = cfg.KRCNN.GCE_HEAD.USE_GN
31 | conv_dim = cfg.KRCNN.GCE_HEAD.CONV_DIM
32 | asppv3_dim = cfg.KRCNN.GCE_HEAD.ASPPV3_DIM
33 | num_convs_before_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
34 | asppv3_dilation = cfg.KRCNN.GCE_HEAD.ASPPV3_DILATION
35 | num_convs_after_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3
36 |
37 | # convx before asppv3 module
38 | before_asppv3_list = []
39 | for _ in range(num_convs_before_asppv3):
40 | before_asppv3_list.append(
41 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
42 | )
43 | self.dim_in = conv_dim
44 | self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None
45 |
46 | # asppv3 module
47 | self.asppv3 = []
48 | self.asppv3.append(
49 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
50 | )
51 | for dilation in asppv3_dilation:
52 | self.asppv3.append(
53 | make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn,
54 | use_relu=True)
55 | )
56 | self.asppv3 = nn.ModuleList(self.asppv3)
57 | self.im_pool = nn.Sequential(
58 | nn.AdaptiveAvgPool2d(1),
59 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
60 | )
61 | self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim
62 |
63 | feat_list = []
64 | feat_list.append(
65 | make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
66 | )
67 | if use_nl:
68 | feat_list.append(
69 | NonLocal2d(conv_dim, int(conv_dim * cfg.KRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)
70 | )
71 | self.feat = nn.Sequential(*feat_list)
72 | self.dim_in = conv_dim
73 |
74 | # convx after asppv3 module
75 | assert num_convs_after_asppv3 >= 1
76 | after_asppv3_list = []
77 | for _ in range(num_convs_after_asppv3):
78 | after_asppv3_list.append(
79 | make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)
80 | )
81 | self.dim_in = conv_dim
82 | self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None
83 | self.dim_out = self.dim_in
84 |
85 | def forward(self, x, proposals):
86 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION
87 | x = self.pooler(x, proposals)
88 |
89 | if self.conv_before_asppv3 is not None:
90 | x = self.conv_before_asppv3(x)
91 |
92 | asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution,
93 | mode="bilinear", align_corners=False)]
94 | for i in range(len(self.asppv3)):
95 | asppv3_out.append(self.asppv3[i](x))
96 | asppv3_out = torch.cat(asppv3_out, 1)
97 | asppv3_out = self.feat(asppv3_out)
98 |
99 | if self.conv_after_asppv3 is not None:
100 | x = self.conv_after_asppv3(asppv3_out)
101 | return x
102 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/inference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | import torch
5 | from torch import nn
6 |
7 | from utils.data.structures.bounding_box import BoxList
8 | from utils.data.structures.keypoint import PersonKeypoints
9 |
10 |
11 | class KeypointPostProcessor(nn.Module):
12 | def __init__(self):
13 | super(KeypointPostProcessor, self).__init__()
14 |
15 | def forward(self, x, boxes):
16 | boxes_per_image = [len(box) for box in boxes]
17 | kpt_prob = x.split(boxes_per_image, dim=0)
18 |
19 | results = []
20 | for prob, box in zip(kpt_prob, boxes):
21 | bbox = BoxList(box.bbox, box.size, mode="xyxy")
22 | for field in box.fields():
23 | bbox.add_field(field, box.get_field(field))
24 | bbox.add_field("keypoints", prob.cpu().numpy())
25 | results.append(bbox)
26 |
27 | return results
28 |
29 |
30 | def keypoint_results(maps, rois):
31 | """Extract predicted keypoint locations from heatmaps. Output has shape
32 | (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
33 | for each keypoint.
34 | """
35 | # This function converts a discrete image coordinate in a HEATMAP_SIZE x
36 | # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
37 | # consistency with keypoints_to_heatmap_labels by using the conversion from
38 | # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
39 | # continuous coordinate.
40 | rois = rois.bbox.numpy()
41 |
42 | offset_x = rois[:, 0]
43 | offset_y = rois[:, 1]
44 |
45 | widths = rois[:, 2] - rois[:, 0]
46 | heights = rois[:, 3] - rois[:, 1]
47 | widths = np.maximum(widths, 1)
48 | heights = np.maximum(heights, 1)
49 | widths_ceil = np.ceil(widths)
50 | heights_ceil = np.ceil(heights)
51 |
52 | # NCHW to NHWC for use with OpenCV
53 | maps = np.transpose(maps, [0, 2, 3, 1])
54 | min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE
55 | num_keypoints = maps.shape[3]
56 | xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32)
57 | end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32)
58 | for i in range(len(rois)):
59 | if min_size > 0:
60 | roi_map_width = int(np.maximum(widths_ceil[i], min_size))
61 | roi_map_height = int(np.maximum(heights_ceil[i], min_size))
62 | else:
63 | roi_map_width = widths_ceil[i]
64 | roi_map_height = heights_ceil[i]
65 | width_correction = widths[i] / roi_map_width
66 | height_correction = heights[i] / roi_map_height
67 | roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC)
68 | # Bring back to CHW
69 | roi_map = np.transpose(roi_map, [2, 0, 1])
70 | # roi_map_probs = scores_to_probs(roi_map.copy())
71 | w = roi_map.shape[2]
72 | pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1)
73 | x_int = pos % w
74 | y_int = (pos - x_int) // w
75 | # assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max())
76 | x = (x_int + 0.5) * width_correction
77 | y = (y_int + 0.5) * height_correction
78 | xy_preds[i, 0, :] = x + offset_x[i]
79 | xy_preds[i, 1, :] = y + offset_y[i]
80 | xy_preds[i, 2, :] = 1
81 | end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int]
82 |
83 | xy = np.concatenate((xy_preds[:, :2, :], end_scores[:, np.newaxis, :]), axis=1)
84 |
85 | return np.transpose(xy_preds, [0, 2, 1]), xy
86 |
87 |
88 | def keypoint_post_processor():
89 | keypoint_post_processor = KeypointPostProcessor()
90 | return keypoint_post_processor
91 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/keypoint_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from rcnn.modeling.keypoint_rcnn import heads
4 | from rcnn.modeling.keypoint_rcnn import outputs
5 | from rcnn.modeling.keypoint_rcnn.inference import keypoint_post_processor
6 | from rcnn.modeling.keypoint_rcnn.loss import keypoint_loss_evaluator
7 | from rcnn.modeling import registry
8 | from rcnn.core.config import cfg
9 |
10 |
11 | class KeypointRCNN(torch.nn.Module):
12 | def __init__(self, dim_in, spatial_scale):
13 | super(KeypointRCNN, self).__init__()
14 | if len(cfg.KRCNN.ROI_STRIDES) == 0:
15 | self.spatial_scale = spatial_scale
16 | else:
17 | self.spatial_scale = [1. / stride for stride in cfg.KRCNN.ROI_STRIDES]
18 |
19 | head = registry.ROI_KEYPOINT_HEADS[cfg.KRCNN.ROI_KEYPOINT_HEAD]
20 | self.Head = head(dim_in, self.spatial_scale)
21 | output = registry.ROI_KEYPOINT_OUTPUTS[cfg.KRCNN.ROI_KEYPOINT_OUTPUT]
22 | self.Output = output(self.Head.dim_out)
23 |
24 | self.post_processor = keypoint_post_processor()
25 | self.loss_evaluator = keypoint_loss_evaluator()
26 |
27 | def forward(self, conv_features, proposals, targets=None):
28 | if self.training:
29 | return self._forward_train(conv_features, proposals, targets)
30 | else:
31 | return self._forward_test(conv_features, proposals)
32 |
33 | def _forward_train(self, conv_features, proposals, targets=None):
34 | all_proposals = proposals
35 | with torch.no_grad():
36 | proposals = self.loss_evaluator.resample(proposals, targets)
37 |
38 | x = self.Head(conv_features, proposals)
39 | kp_logits = self.Output(x)
40 |
41 | loss_kp = self.loss_evaluator(kp_logits)
42 |
43 | return x, all_proposals, dict(loss_kp=loss_kp)
44 |
45 | def _forward_test(self, conv_features, proposals):
46 | x = self.Head(conv_features, proposals)
47 | kp_logits = self.Output(x)
48 |
49 | result = self.post_processor(kp_logits, proposals)
50 | return x, result, {}
51 |
--------------------------------------------------------------------------------
/rcnn/modeling/keypoint_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | @registry.ROI_KEYPOINT_OUTPUTS.register("keypoint_output")
9 | class Keypoint_output(nn.Module):
10 | def __init__(self, dim_in):
11 | super(Keypoint_output, self).__init__()
12 | num_keypoints = cfg.KRCNN.NUM_CLASSES
13 | assert cfg.KRCNN.RESOLUTION[0] // cfg.KRCNN.ROI_XFORM_RESOLUTION[0] == \
14 | cfg.KRCNN.RESOLUTION[1] // cfg.KRCNN.ROI_XFORM_RESOLUTION[1]
15 | self.up_scale = cfg.KRCNN.RESOLUTION[0] // (cfg.KRCNN.ROI_XFORM_RESOLUTION[0] * 2)
16 |
17 | deconv_kernel = 4
18 | self.kps_score_lowres = nn.ConvTranspose2d(
19 | dim_in,
20 | num_keypoints,
21 | deconv_kernel,
22 | stride=2,
23 | padding=deconv_kernel // 2 - 1,
24 | )
25 |
26 | nn.init.kaiming_normal_(self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu")
27 | nn.init.constant_(self.kps_score_lowres.bias, 0)
28 |
29 | self.dim_out = num_keypoints
30 |
31 | def forward(self, x):
32 | x = self.kps_score_lowres(x)
33 | if self.up_scale > 1:
34 | x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
35 |
36 | return x
37 |
--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/mask_rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 |
--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from models.imagenet.utils import convert_conv2convws_model
5 | from utils.net import make_conv
6 | from rcnn.utils.poolers import Pooler
7 | from rcnn.modeling import registry
8 | from rcnn.core.config import cfg
9 |
10 |
11 | @registry.ROI_MASK_HEADS.register("roi_convx_head")
12 | class roi_convx_head(nn.Module):
13 | """
14 | Heads for FPN for classification
15 | """
16 |
17 | def __init__(self, dim_in, spatial_scale):
18 | """
19 | Arguments:
20 | num_classes (int): number of output classes
21 | input_size (int): number of channels of the input once it's flattened
22 | representation_size (int): size of the intermediate representation
23 | """
24 | super(roi_convx_head, self).__init__()
25 | self.dim_in = dim_in[-1]
26 |
27 | method = cfg.MRCNN.ROI_XFORM_METHOD
28 | resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION
29 | sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO
30 | pooler = Pooler(
31 | method=method,
32 | output_size=resolution,
33 | scales=spatial_scale,
34 | sampling_ratio=sampling_ratio,
35 | )
36 | self.pooler = pooler
37 |
38 | use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE
39 | use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN
40 | use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN
41 | conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM
42 | num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS
43 | dilation = cfg.MRCNN.CONVX_HEAD.DILATION
44 |
45 | self.blocks = []
46 | for layer_idx in range(num_stacked_convs):
47 | layer_name = "mask_fcn{}".format(layer_idx + 1)
48 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
49 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
50 | self.add_module(layer_name, module)
51 | self.dim_in = conv_dim
52 | self.blocks.append(layer_name)
53 | self.dim_out = self.dim_in
54 |
55 | if cfg.MRCNN.CONVX_HEAD.USE_WS:
56 | self = convert_conv2convws_model(self)
57 |
58 | for m in self.modules():
59 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
60 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity="relu")
61 | if m.bias is not None:
62 | nn.init.zeros_(m.bias)
63 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
64 | nn.init.constant_(m.weight, 1)
65 | nn.init.constant_(m.bias, 0)
66 |
67 | def forward(self, x, proposals):
68 | x = self.pooler(x, proposals)
69 | roi_feature = x
70 | for layer_name in self.blocks:
71 | x = F.relu(getattr(self, layer_name)(x))
72 |
73 | return x, roi_feature
74 |
75 |
--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/mask_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from utils.data.structures.boxlist_ops import cat_boxlist
4 | from rcnn.modeling.mask_rcnn import heads
5 | from rcnn.modeling.mask_rcnn import outputs
6 | from rcnn.modeling.mask_rcnn.inference import mask_post_processor
7 | from rcnn.modeling.mask_rcnn.loss import mask_loss_evaluator
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | class MaskRCNN(torch.nn.Module):
13 | def __init__(self, dim_in, spatial_scale):
14 | super(MaskRCNN, self).__init__()
15 | if len(cfg.MRCNN.ROI_STRIDES) == 0:
16 | self.spatial_scale = spatial_scale
17 | else:
18 | self.spatial_scale = [1. / stride for stride in cfg.MRCNN.ROI_STRIDES]
19 |
20 | head = registry.ROI_MASK_HEADS[cfg.MRCNN.ROI_MASK_HEAD]
21 | self.Head = head(dim_in, self.spatial_scale)
22 | output = registry.ROI_MASK_OUTPUTS[cfg.MRCNN.ROI_MASK_OUTPUT]
23 | self.Output = output(self.Head.dim_out)
24 |
25 | self.post_processor = mask_post_processor()
26 | self.loss_evaluator = mask_loss_evaluator()
27 |
28 | def forward(self, conv_features, proposals, targets=None):
29 | """
30 | Arguments:
31 | conv_features (list[Tensor]): feature-maps from possibly several levels
32 | proposals (list[BoxList]): proposal boxes
33 | targets (list[BoxList], optional): the ground-truth targets.
34 |
35 | Returns:
36 | x (Tensor): the result of the feature extractor
37 | all_proposals (list[BoxList]): during training, the original proposals
38 | are returned. During testing, the predicted boxlists are returned
39 | with the `mask` field set
40 | losses (dict[Tensor]): During training, returns the losses for the
41 | head. During testing, returns an empty dict.
42 | """
43 | if self.training:
44 | return self._forward_train(conv_features, proposals, targets)
45 | else:
46 | return self._forward_test(conv_features, proposals)
47 |
48 | def _forward_train(self, conv_features, proposals, targets=None):
49 | # during training, only focus on positive boxes
50 | all_proposals = proposals
51 |
52 | with torch.no_grad():
53 | proposals = self.loss_evaluator.resample(proposals, targets)
54 |
55 | x, roi_feature = self.Head(conv_features, proposals)
56 | mask_logits = self.Output(x)
57 |
58 | loss_mask = self.loss_evaluator(mask_logits)
59 | return x, all_proposals, dict(loss_mask=loss_mask)
60 |
61 | def _forward_test(self, conv_features, proposals):
62 | x, roi_feature = self.Head(conv_features, proposals)
63 | mask_logits = self.Output(x)
64 |
65 | result = self.post_processor(mask_logits, proposals)
66 | return x, result, {}
67 |
--------------------------------------------------------------------------------
/rcnn/modeling/mask_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | @registry.ROI_MASK_OUTPUTS.register("mask_deconv_output")
9 | class Mask_deconv_output(nn.Module):
10 | def __init__(self, dim_in):
11 | super(Mask_deconv_output, self).__init__()
12 | num_classes = cfg.MODEL.NUM_CLASSES
13 |
14 | self.mask_deconv = nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)
15 | self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0)
16 |
17 | # init
18 | nn.init.kaiming_normal_(self.mask_deconv.weight, mode='fan_out', nonlinearity="relu")
19 | if self.mask_deconv.bias is not None:
20 | nn.init.zeros_(self.mask_deconv.bias)
21 | nn.init.normal_(self.mask_fcn_logits.weight, std=0.001)
22 | if self.mask_fcn_logits.bias is not None:
23 | nn.init.constant_(self.mask_fcn_logits.bias, 0)
24 |
25 | def forward(self, x):
26 | x = F.relu(self.mask_deconv(x))
27 | return self.mask_fcn_logits(x)
28 |
29 |
30 | @registry.ROI_MASK_OUTPUTS.register("mask_logits_output")
31 | class Mask_logits_output(nn.Module):
32 | def __init__(self, dim_in):
33 | super(Mask_logits_output, self).__init__()
34 | num_classes = cfg.MODEL.NUM_CLASSES
35 |
36 | self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0)
37 |
38 | # init
39 | nn.init.normal_(self.mask_fcn_logits.weight, std=0.001)
40 | if self.mask_fcn_logits.bias is not None:
41 | nn.init.constant_(self.mask_fcn_logits.bias, 0)
42 |
43 | def forward(self, x):
44 | return self.mask_fcn_logits(x)
45 |
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/parsing_rcnn/__init__.py
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 |
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from utils.net import make_conv
5 | from rcnn.utils.poolers import Pooler
6 | from rcnn.modeling import registry
7 | from rcnn.core.config import cfg
8 |
9 |
10 | @registry.ROI_PARSING_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 | def __init__(self, dim_in, spatial_scale):
13 | super(roi_convx_head, self).__init__()
14 | self.dim_in = dim_in[-1]
15 |
16 | method = cfg.PRCNN.ROI_XFORM_METHOD
17 | resolution = cfg.PRCNN.ROI_XFORM_RESOLUTION
18 | sampling_ratio = cfg.PRCNN.ROI_XFORM_SAMPLING_RATIO
19 | pooler = Pooler(
20 | method=method,
21 | output_size=resolution,
22 | scales=spatial_scale,
23 | sampling_ratio=sampling_ratio,
24 | )
25 | self.pooler = pooler
26 |
27 | use_lite = cfg.PRCNN.CONVX_HEAD.USE_LITE
28 | use_bn = cfg.PRCNN.CONVX_HEAD.USE_BN
29 | use_gn = cfg.PRCNN.CONVX_HEAD.USE_GN
30 | conv_dim = cfg.PRCNN.CONVX_HEAD.CONV_DIM
31 | num_stacked_convs = cfg.PRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 | dilation = cfg.PRCNN.CONVX_HEAD.DILATION
33 |
34 | self.blocks = []
35 | for layer_idx in range(num_stacked_convs):
36 | layer_name = "parsing_fcn{}".format(layer_idx + 1)
37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 | self.add_module(layer_name, module)
40 | self.dim_in = conv_dim
41 | self.blocks.append(layer_name)
42 | self.dim_out = self.dim_in
43 |
44 | def forward(self, x, proposals):
45 | x = self.pooler(x, proposals)
46 | roi_feature = x
47 | for layer_name in self.blocks:
48 | x = F.relu(getattr(self, layer_name)(x))
49 |
50 | return x, roi_feature
51 |
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/inference.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | from torch import nn
5 | from torch.nn import functional as F
6 |
7 | from utils.data.structures.bounding_box import BoxList
8 | from models.ops.misc import interpolate
9 | from rcnn.core.config import cfg
10 |
11 |
12 | # TODO check if want to return a single BoxList or a composite
13 | # object
14 | class ParsingPostProcessor(nn.Module):
15 | """
16 | From the results of the CNN, post process the masks
17 | by taking the mask corresponding to the class with max
18 | probability (which are of fixed size and directly output
19 | by the CNN) and return the masks in the mask field of the BoxList.
20 | If a masker object is passed, it will additionally
21 | project the masks in the image according to the locations in boxes,
22 | """
23 |
24 | def __init__(self):
25 | super(ParsingPostProcessor, self).__init__()
26 |
27 | def forward(self, x, boxes):
28 | """
29 | Arguments:
30 | x (Tensor): the mask logits
31 | boxes (list[BoxList]): bounding boxes that are used as
32 | reference, one for ech image
33 | Returns:
34 | results (list[BoxList]): one BoxList for each image, containing
35 | the extra field mask
36 | """
37 | parsing_prob = x
38 | parsing_prob = F.softmax(parsing_prob, dim=1)
39 |
40 | boxes_per_image = [len(box) for box in boxes]
41 | parsing_prob = parsing_prob.split(boxes_per_image, dim=0)
42 |
43 | results = []
44 | for prob, box in zip(parsing_prob, boxes):
45 | bbox = BoxList(box.bbox, box.size, mode="xyxy")
46 |
47 | for field in box.fields():
48 | bbox.add_field(field, box.get_field(field))
49 | bbox_scores = bbox.get_field("scores")
50 | bbox.add_field("parsing", prob.cpu().numpy())
51 | bbox.add_field("parsing_scores", bbox_scores.cpu().numpy())
52 | results.append(bbox)
53 |
54 | return results
55 |
56 |
57 | def expand_boxes(boxes, h, w):
58 | """Expand an array of boxes by a given scale."""
59 | w_half = (boxes[:, 2] - boxes[:, 0]) * .5
60 | h_half = (boxes[:, 3] - boxes[:, 1]) * .5
61 | x_c = (boxes[:, 2] + boxes[:, 0]) * .5
62 | y_c = (boxes[:, 3] + boxes[:, 1]) * .5
63 |
64 | h_scale = (h + 2.0) / h
65 | w_scale = (w + 2.0) / w
66 | w_half *= w_scale
67 | h_half *= h_scale
68 |
69 | boxes_exp = np.zeros(boxes.shape)
70 | boxes_exp[:, 0] = x_c - w_half
71 | boxes_exp[:, 2] = x_c + w_half
72 | boxes_exp[:, 1] = y_c - h_half
73 | boxes_exp[:, 3] = y_c + h_half
74 |
75 | return boxes_exp
76 |
77 |
78 | def parsing_results(parsings, boxes, semseg=None):
79 | im_w, im_h = boxes.size
80 | parsings = parsings.transpose((0, 2, 3, 1))
81 | boxes = boxes.bbox.numpy()
82 | H, W = parsings.shape[1:3]
83 | N = parsings.shape[3]
84 |
85 | boxes = expand_boxes(boxes, H, W)
86 | boxes = boxes.astype(np.int32)
87 | padded_parsing = np.zeros((H + 2, W + 2, N), dtype=np.float32)
88 |
89 | if semseg is not None:
90 | semseg = cv2.resize(semseg, (im_w, im_h), interpolation=cv2.INTER_LINEAR)
91 | else:
92 | semseg = np.zeros((im_h, im_w, N), dtype=np.float32)
93 |
94 | parsing_results = []
95 | for i in range(boxes.shape[0]):
96 | padded_parsing[1:-1, 1:-1] = parsings[i]
97 | box = boxes[i, :]
98 | w = box[2] - box[0] + 1
99 | h = box[3] - box[1] + 1
100 | w = np.maximum(w, 1)
101 | h = np.maximum(h, 1)
102 | parsing = cv2.resize(padded_parsing, (w, h), interpolation=cv2.INTER_LINEAR)
103 | parsing_idx = np.argmax(parsing, axis=2)
104 | im_parsing = np.zeros((im_h, im_w), dtype=np.uint8)
105 | x_0 = max(box[0], 0)
106 | x_1 = min(box[2] + 1, im_w)
107 | y_0 = max(box[1], 0)
108 | y_1 = min(box[3] + 1, im_h)
109 |
110 | mask = np.where(parsing_idx >= 1, 1, 0)
111 | mask = mask[:, :, np.newaxis].repeat(N, axis=2)
112 | cropped_semseg = semseg[y_0:y_1, x_0:x_1] * mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]
113 |
114 | parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] += \
115 | cropped_semseg * cfg.PRCNN.SEMSEG_FUSE_WEIGHT
116 | parsing = np.argmax(parsing, axis=2)
117 |
118 | im_parsing[y_0:y_1, x_0:x_1] = parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]
119 | parsing_results.append(im_parsing)
120 | return parsing_results
121 |
122 |
123 | def parsing_post_processor():
124 | parsing_post_processor = ParsingPostProcessor()
125 | return parsing_post_processor
126 |
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | @registry.ROI_PARSING_OUTPUTS.register("parsing_output")
9 | class Parsing_output(nn.Module):
10 | def __init__(self, dim_in):
11 | super(Parsing_output, self).__init__()
12 | num_parsing = cfg.PRCNN.NUM_PARSING
13 | assert cfg.PRCNN.RESOLUTION[0] // cfg.PRCNN.ROI_XFORM_RESOLUTION[0] == \
14 | cfg.PRCNN.RESOLUTION[1] // cfg.PRCNN.ROI_XFORM_RESOLUTION[1]
15 | self.up_scale = cfg.PRCNN.RESOLUTION[0] // (cfg.PRCNN.ROI_XFORM_RESOLUTION[0] * 2)
16 |
17 | deconv_kernel = 4
18 | self.parsing_score_lowres = nn.ConvTranspose2d(
19 | dim_in,
20 | num_parsing,
21 | deconv_kernel,
22 | stride=2,
23 | padding=deconv_kernel // 2 - 1,
24 | )
25 |
26 | nn.init.kaiming_normal_(self.parsing_score_lowres.weight, mode="fan_out", nonlinearity="relu")
27 | nn.init.constant_(self.parsing_score_lowres.bias, 0)
28 |
29 | self.dim_out = num_parsing
30 |
31 | def forward(self, x):
32 | x = self.parsing_score_lowres(x)
33 | if self.up_scale > 1:
34 | x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
35 |
36 | return x
37 |
--------------------------------------------------------------------------------
/rcnn/modeling/parsing_rcnn/parsing_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from rcnn.modeling.parsing_rcnn import heads
4 | from rcnn.modeling.parsing_rcnn import outputs
5 | from rcnn.modeling.parsing_rcnn.inference import parsing_post_processor
6 | from rcnn.modeling.parsing_rcnn.loss import parsing_loss_evaluator
7 | from rcnn.modeling import registry
8 | from rcnn.core.config import cfg
9 |
10 |
11 | class ParsingRCNN(torch.nn.Module):
12 | def __init__(self, dim_in, spatial_scale):
13 | super(ParsingRCNN, self).__init__()
14 | if len(cfg.PRCNN.ROI_STRIDES) == 0:
15 | self.spatial_scale = spatial_scale
16 | else:
17 | self.spatial_scale = [1. / stride for stride in cfg.PRCNN.ROI_STRIDES]
18 |
19 | head = registry.ROI_PARSING_HEADS[cfg.PRCNN.ROI_PARSING_HEAD]
20 | self.Head = head(dim_in, self.spatial_scale)
21 | output = registry.ROI_PARSING_OUTPUTS[cfg.PRCNN.ROI_PARSING_OUTPUT]
22 | self.Output = output(self.Head.dim_out)
23 |
24 | self.post_processor = parsing_post_processor()
25 | self.loss_evaluator = parsing_loss_evaluator()
26 |
27 | def forward(self, conv_features, proposals, targets=None):
28 | """
29 | Arguments:
30 | conv_features (list[Tensor]): feature-maps from possibly several levels
31 | proposals (list[BoxList]): proposal boxes
32 | targets (list[BoxList], optional): the ground-truth targets.
33 | Returns:
34 | x (Tensor): the result of the feature extractor
35 | all_proposals (list[BoxList]): during training, the original proposals
36 | are returned. During testing, the predicted boxlists are returned
37 | with the `parsing` field set
38 | losses (dict[Tensor]): During training, returns the losses for the
39 | head. During testing, returns an empty dict.
40 | """
41 | if self.training:
42 | return self._forward_train(conv_features, proposals, targets)
43 | else:
44 | return self._forward_test(conv_features, proposals)
45 |
46 | def _forward_train(self, conv_features, proposals, targets=None):
47 | all_proposals = proposals
48 | with torch.no_grad():
49 | proposals = self.loss_evaluator.resample(proposals, targets)
50 |
51 | x, roi_feature = self.Head(conv_features, proposals)
52 | parsing_logits = self.Output(x)
53 |
54 | loss_parsing = self.loss_evaluator(parsing_logits)
55 | return x, all_proposals, dict(loss_parsing=loss_parsing)
56 |
57 | def _forward_test(self, conv_features, proposals):
58 | x, roi_feature = self.Head(conv_features, proposals)
59 | parsing_logits = self.Output(x)
60 |
61 | result = self.post_processor(parsing_logits, proposals)
62 | return x, result, {}
63 |
--------------------------------------------------------------------------------
/rcnn/modeling/registry.py:
--------------------------------------------------------------------------------
1 | from utils.registry import Registry
2 |
3 |
4 | """
5 | Feature Extractor.
6 | """
7 | # Backbone
8 | BACKBONES = Registry()
9 |
10 | # FPN
11 | FPN_BODY = Registry()
12 |
13 |
14 | """
15 | ROI Head.
16 | """
17 | # Box Head
18 | ROI_CLS_HEADS = Registry()
19 | ROI_CLS_OUTPUTS = Registry()
20 | ROI_BOX_HEADS = Registry()
21 | ROI_BOX_OUTPUTS = Registry()
22 |
23 | # Cascade Head
24 | ROI_CASCADE_HEADS = Registry()
25 | ROI_CASCADE_OUTPUTS = Registry()
26 |
27 | # Mask Head
28 | ROI_MASK_HEADS = Registry()
29 | ROI_MASK_OUTPUTS = Registry()
30 |
31 | # Keypoint Head
32 | ROI_KEYPOINT_HEADS = Registry()
33 | ROI_KEYPOINT_OUTPUTS = Registry()
34 |
35 | # Parsing Head
36 | ROI_PARSING_HEADS = Registry()
37 | ROI_PARSING_OUTPUTS = Registry()
38 |
39 | # UV Head
40 | ROI_UV_HEADS = Registry()
41 | ROI_UV_OUTPUTS = Registry()
42 |
43 |
--------------------------------------------------------------------------------
/rcnn/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .convx_heads import *
2 | from .gce_heads import *
3 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/convx_heads.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from utils.net import make_conv
5 | from rcnn.utils.poolers import Pooler
6 | from rcnn.modeling import registry
7 | from rcnn.core.config import cfg
8 |
9 |
10 | @registry.ROI_UV_HEADS.register("roi_convx_head")
11 | class roi_convx_head(nn.Module):
12 | def __init__(self, dim_in, spatial_scale):
13 | super(roi_convx_head, self).__init__()
14 | self.dim_in = dim_in[-1]
15 |
16 | method = cfg.UVRCNN.ROI_XFORM_METHOD
17 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
18 | sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
19 | pooler = Pooler(
20 | method=method,
21 | output_size=resolution,
22 | scales=spatial_scale,
23 | sampling_ratio=sampling_ratio,
24 | )
25 | self.pooler = pooler
26 |
27 | use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE
28 | use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN
29 | use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN
30 | conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM
31 | num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS
32 | dilation = cfg.UVRCNN.CONVX_HEAD.DILATION
33 |
34 | self.blocks = []
35 | for layer_idx in range(num_stacked_convs):
36 | layer_name = "UV_fcn{}".format(layer_idx + 1)
37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite)
39 | self.add_module(layer_name, module)
40 | self.dim_in = conv_dim
41 | self.blocks.append(layer_name)
42 | self.dim_out = self.dim_in
43 |
44 | def forward(self, x, proposals):
45 | x = self.pooler(x, proposals)
46 |
47 | for layer_name in self.blocks:
48 | x = F.relu(getattr(self, layer_name)(x))
49 |
50 | return x
51 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/heads/gce_heads.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from models.ops import interpolate, NonLocal2d
5 | from rcnn.core.config import cfg
6 | from rcnn.modeling import registry
7 | from rcnn.utils.poolers import Pooler
8 | from utils.net import make_conv
9 |
10 |
11 | @registry.ROI_UV_HEADS.register("roi_gce_head")
12 | class roi_gce_head(nn.Module):
13 | def __init__(self, dim_in, spatial_scale):
14 | super(roi_gce_head, self).__init__()
15 | self.dim_in = dim_in[-1]
16 |
17 | method = cfg.UVRCNN.ROI_XFORM_METHOD
18 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
19 | sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
20 | pooler = Pooler(
21 | method=method,
22 | output_size=resolution,
23 | scales=spatial_scale,
24 | sampling_ratio=sampling_ratio,
25 | )
26 | self.pooler = pooler
27 |
28 | use_nl = cfg.UVRCNN.GCE_HEAD.USE_NL
29 | use_bn = cfg.UVRCNN.GCE_HEAD.USE_BN
30 | use_gn = cfg.UVRCNN.GCE_HEAD.USE_GN
31 | conv_dim = cfg.UVRCNN.GCE_HEAD.CONV_DIM
32 | asppv3_dim = cfg.UVRCNN.GCE_HEAD.ASPPV3_DIM
33 | num_convs_before_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
34 | asppv3_dilation = cfg.UVRCNN.GCE_HEAD.ASPPV3_DILATION
35 | num_convs_after_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3
36 |
37 | # convx before asppv3 module
38 | before_asppv3_list = []
39 | for _ in range(num_convs_before_asppv3):
40 | before_asppv3_list.append(
41 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
42 | )
43 | self.dim_in = conv_dim
44 | self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None
45 |
46 | # asppv3 module
47 | self.asppv3 = []
48 | self.asppv3.append(
49 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
50 | )
51 | for dilation in asppv3_dilation:
52 | self.asppv3.append(
53 | make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn,
54 | use_relu=True)
55 | )
56 | self.asppv3 = nn.ModuleList(self.asppv3)
57 | self.im_pool = nn.Sequential(
58 | nn.AdaptiveAvgPool2d(1),
59 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
60 | )
61 | self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim
62 |
63 | feat_list = []
64 | feat_list.append(
65 | make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True)
66 | )
67 | if use_nl:
68 | feat_list.append(
69 | NonLocal2d(conv_dim, int(conv_dim * cfg.UVRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)
70 | )
71 | self.feat = nn.Sequential(*feat_list)
72 | self.dim_in = conv_dim
73 |
74 | # convx after asppv3 module
75 | assert num_convs_after_asppv3 >= 1
76 | after_asppv3_list = []
77 | for _ in range(num_convs_after_asppv3):
78 | after_asppv3_list.append(
79 | make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True)
80 | )
81 | self.dim_in = conv_dim
82 | self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None
83 | self.dim_out = self.dim_in
84 |
85 | def forward(self, x, proposals):
86 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
87 | x = self.pooler(x, proposals)
88 |
89 | if self.conv_before_asppv3 is not None:
90 | x = self.conv_before_asppv3(x)
91 |
92 | asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution,
93 | mode="bilinear", align_corners=False)]
94 | for i in range(len(self.asppv3)):
95 | asppv3_out.append(self.asppv3[i](x))
96 | asppv3_out = torch.cat(asppv3_out, 1)
97 | asppv3_out = self.feat(asppv3_out)
98 |
99 | if self.conv_after_asppv3 is not None:
100 | x = self.conv_after_asppv3(asppv3_out)
101 | return x
102 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/inference.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | import torch
5 | from torch import nn
6 |
7 | from utils.data.structures.bounding_box import BoxList
8 | from rcnn.core.config import cfg
9 |
10 |
11 | # TODO check if want to return a single BoxList or a composite
12 | # object
13 | class UVPostProcessor(nn.Module):
14 | """
15 | From the results of the CNN, post process the masks
16 | by taking the mask corresponding to the class with max
17 | probability (which are of fixed size and directly output
18 | by the CNN) and return the masks in the mask field of the BoxList.
19 |
20 | If a masker object is passed, it will additionally
21 | project the masks in the image according to the locations in boxes,
22 | """
23 |
24 | def __init__(self):
25 | super(UVPostProcessor, self).__init__()
26 |
27 | def forward(self, uv_logits, boxes):
28 | """
29 | Arguments:
30 | uv_logits (List): the uv logits
31 | boxes (list[BoxList]): bounding boxes that are used as
32 | reference, one for ech image
33 |
34 | Returns:
35 | results (list[BoxList]): one BoxList for each image, containing
36 | the extra field mask
37 | """
38 | UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V = uv_logits
39 |
40 | boxes_per_image = [len(box) for box in boxes]
41 | UV_pred_Ann = UV_pred_Ann.split(boxes_per_image, dim=0)
42 | UV_pred_Index = UV_pred_Index.split(boxes_per_image, dim=0)
43 | UV_pred_U = UV_pred_U.split(boxes_per_image, dim=0)
44 | UV_pred_V = UV_pred_V.split(boxes_per_image, dim=0)
45 |
46 | results = []
47 | for Ann, Index, U, V, box in zip(UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V, boxes):
48 | bbox = BoxList(box.bbox, box.size, mode="xyxy")
49 | for field in box.fields():
50 | bbox.add_field(field, box.get_field(field))
51 | bbox.add_field("uv", [Ann.cpu().numpy(), Index.cpu().numpy(), U.cpu().numpy(), V.cpu().numpy()])
52 | results.append(bbox)
53 |
54 | return results
55 |
56 |
57 | def uv_results(uv_logits, boxes):
58 | AnnIndex, Index_UV, U_uv, V_uv = uv_logits
59 | K = cfg.UVRCNN.NUM_PATCHES + 1
60 | boxes = boxes.bbox.numpy()
61 | uvs_results = []
62 | for ind, entry in enumerate(boxes):
63 | # Compute ref box width and height
64 | bx = max(entry[2] - entry[0], 1)
65 | by = max(entry[3] - entry[1], 1)
66 |
67 | # preds[ind] axes are CHW; bring p axes to WHC
68 | CurAnnIndex = np.swapaxes(AnnIndex[ind], 0, 2)
69 | CurIndex_UV = np.swapaxes(Index_UV[ind], 0, 2)
70 | CurU_uv = np.swapaxes(U_uv[ind], 0, 2)
71 | CurV_uv = np.swapaxes(V_uv[ind], 0, 2)
72 |
73 | # Resize p from (HEATMAP_SIZE, HEATMAP_SIZE, c) to (int(bx), int(by), c)
74 | CurAnnIndex = cv2.resize(CurAnnIndex, (by, bx))
75 | CurIndex_UV = cv2.resize(CurIndex_UV, (by, bx))
76 | CurU_uv = cv2.resize(CurU_uv, (by, bx))
77 | CurV_uv = cv2.resize(CurV_uv, (by, bx))
78 |
79 | # Bring Cur_Preds axes back to CHW
80 | CurAnnIndex = np.swapaxes(CurAnnIndex, 0, 2)
81 | CurIndex_UV = np.swapaxes(CurIndex_UV, 0, 2)
82 | CurU_uv = np.swapaxes(CurU_uv, 0, 2)
83 | CurV_uv = np.swapaxes(CurV_uv, 0, 2)
84 |
85 | # Removed squeeze calls due to singleton dimension issues
86 | CurAnnIndex = np.argmax(CurAnnIndex, axis=0)
87 | CurIndex_UV = np.argmax(CurIndex_UV, axis=0)
88 | CurIndex_UV = CurIndex_UV * (CurAnnIndex>0).astype(np.float32)
89 |
90 | output = np.zeros([3, int(by), int(bx)], dtype=np.float32)
91 | output[0] = CurIndex_UV
92 |
93 | for part_id in range(1, K):
94 | CurrentU = CurU_uv[part_id]
95 | CurrentV = CurV_uv[part_id]
96 | output[1, CurIndex_UV==part_id] = CurrentU[CurIndex_UV==part_id]
97 | output[2, CurIndex_UV==part_id] = CurrentV[CurIndex_UV==part_id]
98 | uvs_results.append(output)
99 | return uvs_results
100 |
101 |
102 | def uv_post_processor():
103 | uv_post_processor = UVPostProcessor()
104 | return uv_post_processor
105 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/outputs.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from rcnn.modeling import registry
5 | from rcnn.core.config import cfg
6 |
7 |
8 | @registry.ROI_UV_OUTPUTS.register("uv_output")
9 | class UV_output(nn.Module):
10 | def __init__(self, dim_in):
11 | super(UV_output, self).__init__()
12 | num_patches = cfg.UVRCNN.NUM_PATCHES
13 | deconv_kernel = 4
14 | assert cfg.UVRCNN.RESOLUTION[0] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] == \
15 | cfg.UVRCNN.RESOLUTION[1] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[1]
16 | self.up_scale = cfg.UVRCNN.RESOLUTION[0] // (cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] * 2)
17 |
18 | self.deconv_Ann = nn.ConvTranspose2d(dim_in, 15, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
19 | self.deconv_Index = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2,
20 | padding=deconv_kernel // 2 - 1)
21 | self.deconv_U = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
22 | self.deconv_V = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1)
23 |
24 | for m in self.modules():
25 | if isinstance(m, nn.ConvTranspose2d):
26 | nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
27 | nn.init.constant_(m.bias, 0)
28 |
29 | def forward(self, x):
30 | x_Ann = self.deconv_Ann(x)
31 | x_Index = self.deconv_Index(x)
32 | x_U = self.deconv_U(x)
33 | x_V = self.deconv_V(x)
34 |
35 | if self.up_scale > 1:
36 | x_Ann = F.interpolate(x_Ann, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
37 | x_Index = F.interpolate(x_Index, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
38 | x_U = F.interpolate(x_U, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
39 | x_V = F.interpolate(x_V, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
40 |
41 | return [x_Ann, x_Index, x_U, x_V]
42 |
--------------------------------------------------------------------------------
/rcnn/modeling/uv_rcnn/uv_rcnn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from rcnn.modeling.uv_rcnn import heads
5 | from rcnn.modeling.uv_rcnn import outputs
6 | from rcnn.modeling.uv_rcnn.inference import uv_post_processor
7 | from rcnn.modeling.uv_rcnn.loss import uv_loss_evaluator
8 | from rcnn.modeling import registry
9 | from rcnn.core.config import cfg
10 |
11 |
12 | class UVRCNN(torch.nn.Module):
13 | def __init__(self, dim_in, spatial_scale):
14 | super(UVRCNN, self).__init__()
15 | if len(cfg.UVRCNN.ROI_STRIDES) == 0:
16 | self.spatial_scale = spatial_scale
17 | else:
18 | self.spatial_scale = [1. / stride for stride in cfg.UVRCNN.ROI_STRIDES]
19 | # self.roi_batch_size = cfg.UVRCNN.ROI_BATCH_SIZE # TODO
20 |
21 | head = registry.ROI_UV_HEADS[cfg.UVRCNN.ROI_UV_HEAD]
22 | self.Head = head(dim_in, self.spatial_scale)
23 | output = registry.ROI_UV_OUTPUTS[cfg.UVRCNN.ROI_UV_OUTPUT]
24 | self.Output = output(self.Head.dim_out)
25 |
26 | self.post_processor = uv_post_processor()
27 | self.loss_evaluator = uv_loss_evaluator()
28 |
29 | def forward(self, conv_features, proposals, targets=None):
30 | if self.training:
31 | return self._forward_train(conv_features, proposals, targets)
32 | else:
33 | return self._forward_test(conv_features, proposals)
34 |
35 | def _forward_train(self, conv_features, proposals, targets=None):
36 | all_proposals = proposals
37 | with torch.no_grad():
38 | proposals = self.loss_evaluator.resample(proposals, targets)
39 |
40 | x = self.Head(conv_features, proposals)
41 | uv_logits = self.Output(x)
42 |
43 | loss_Upoints, loss_Vpoints, loss_seg_AnnIndex, loss_IndexUVPoints = self.loss_evaluator(uv_logits)
44 | loss_dict = dict(loss_Upoints=loss_Upoints, loss_Vpoints=loss_Vpoints,
45 | loss_seg_Ann=loss_seg_AnnIndex, loss_IPoints=loss_IndexUVPoints)
46 |
47 | return x, all_proposals, loss_dict
48 |
49 | def _forward_test(self, conv_features, proposals):
50 | x = self.Head(conv_features, proposals)
51 | uv_logits = self.Output(x)
52 |
53 | result = self.post_processor(uv_logits, proposals)
54 | return x, result, {}
55 |
--------------------------------------------------------------------------------
/rcnn/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import ROIAlign
2 | from .roi_align import roi_align
3 | from .roi_pool import ROIPool
4 | from .roi_pool import roi_pool
5 | from .deform_pool import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
6 | from .deform_pool import deform_roi_pooling
7 |
--------------------------------------------------------------------------------
/rcnn/ops/roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.nn.modules.utils import _pair
6 |
7 | from models.ops import _C
8 |
9 | from apex import amp
10 |
11 |
12 | class _ROIAlign(Function):
13 | @staticmethod
14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
15 | ctx.save_for_backward(roi)
16 | ctx.output_size = _pair(output_size)
17 | ctx.spatial_scale = spatial_scale
18 | ctx.sampling_ratio = sampling_ratio
19 | ctx.input_shape = input.size()
20 | ctx.aligned = aligned
21 | output = _C.roi_align_forward(
22 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
23 | )
24 | return output
25 |
26 | @staticmethod
27 | @once_differentiable
28 | def backward(ctx, grad_output):
29 | rois, = ctx.saved_tensors
30 | output_size = ctx.output_size
31 | spatial_scale = ctx.spatial_scale
32 | sampling_ratio = ctx.sampling_ratio
33 | bs, ch, h, w = ctx.input_shape
34 | grad_input = _C.roi_align_backward(
35 | grad_output, rois, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, sampling_ratio, ctx.aligned
36 | )
37 | return grad_input, None, None, None, None, None
38 |
39 |
40 | roi_align = _ROIAlign.apply
41 |
42 |
43 | class ROIAlign(nn.Module):
44 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned):
45 | super(ROIAlign, self).__init__()
46 | self.output_size = output_size
47 | self.spatial_scale = spatial_scale
48 | self.sampling_ratio = sampling_ratio
49 | self.aligned = aligned
50 |
51 | @amp.float_function
52 | def forward(self, input, rois):
53 | return roi_align(
54 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
55 | )
56 |
57 | def __repr__(self):
58 | tmpstr = self.__class__.__name__ + "("
59 | tmpstr += "output_size=" + str(self.output_size)
60 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
61 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
62 | tmpstr += ")"
63 | return tmpstr
64 |
--------------------------------------------------------------------------------
/rcnn/ops/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.nn.modules.utils import _pair
6 |
7 | from models.ops import _C
8 |
9 | from apex import amp
10 |
11 |
12 | class _ROIPool(Function):
13 | @staticmethod
14 | def forward(ctx, input, roi, output_size, spatial_scale):
15 | ctx.output_size = _pair(output_size)
16 | ctx.spatial_scale = spatial_scale
17 | ctx.input_shape = input.size()
18 | output, argmax = _C.roi_pool_forward(
19 | input, roi, spatial_scale, output_size[0], output_size[1]
20 | )
21 | ctx.save_for_backward(input, roi, argmax)
22 | return output
23 |
24 | @staticmethod
25 | @once_differentiable
26 | def backward(ctx, grad_output):
27 | input, rois, argmax = ctx.saved_tensors
28 | output_size = ctx.output_size
29 | spatial_scale = ctx.spatial_scale
30 | bs, ch, h, w = ctx.input_shape
31 | grad_input = _C.roi_pool_backward(
32 | grad_output, input, rois, argmax, spatial_scale, output_size[0], output_size[1], bs, ch, h, w,
33 | )
34 | return grad_input, None, None, None
35 |
36 |
37 | roi_pool = _ROIPool.apply
38 |
39 |
40 | class ROIPool(nn.Module):
41 | def __init__(self, output_size, spatial_scale):
42 | super(ROIPool, self).__init__()
43 | self.output_size = output_size
44 | self.spatial_scale = spatial_scale
45 |
46 | @amp.float_function
47 | def forward(self, input, rois):
48 | return roi_pool(input, rois, self.output_size, self.spatial_scale)
49 |
50 | def __repr__(self):
51 | tmpstr = self.__class__.__name__ + "("
52 | tmpstr += "output_size=" + str(self.output_size)
53 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
54 | tmpstr += ")"
55 | return tmpstr
56 |
--------------------------------------------------------------------------------
/rcnn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/utils/__init__.py
--------------------------------------------------------------------------------
/rcnn/utils/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class BalancedPositiveNegativeSampler(object):
5 | """
6 | This class samples batches, ensuring that they contain a fixed proportion of positives
7 | """
8 |
9 | def __init__(self, batch_size_per_image, positive_fraction):
10 | """
11 | Arguments:
12 | batch_size_per_image (int): number of elements to be selected per image
13 | positive_fraction (float): percentace of positive elements per batch
14 | """
15 | self.batch_size_per_image = batch_size_per_image
16 | self.positive_fraction = positive_fraction
17 |
18 | def __call__(self, matched_idxs):
19 | """
20 | Arguments:
21 | matched idxs: list of tensors containing -1, 0 or positive values.
22 | Each tensor corresponds to a specific image.
23 | -1 values are ignored, 0 are considered as negatives and > 0 as
24 | positives.
25 |
26 | Returns:
27 | pos_idx (list[tensor])
28 | neg_idx (list[tensor])
29 |
30 | Returns two lists of binary masks for each image.
31 | The first list contains the positive elements that were selected,
32 | and the second list the negative example.
33 | """
34 | pos_idx = []
35 | neg_idx = []
36 | for matched_idxs_per_image in matched_idxs:
37 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
38 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
39 |
40 | num_pos = int(self.batch_size_per_image * self.positive_fraction)
41 | # protect against not enough positive examples
42 | num_pos = min(positive.numel(), num_pos)
43 | num_neg = self.batch_size_per_image - num_pos
44 | # protect against not enough negative examples
45 | num_neg = min(negative.numel(), num_neg)
46 |
47 | # randomly select positive and negative examples
48 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
49 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
50 |
51 | pos_idx_per_image = positive[perm1]
52 | neg_idx_per_image = negative[perm2]
53 |
54 | # create binary mask from indices
55 | pos_idx_per_image_mask = torch.zeros_like(
56 | matched_idxs_per_image, dtype=torch.uint8
57 | )
58 | neg_idx_per_image_mask = torch.zeros_like(
59 | matched_idxs_per_image, dtype=torch.uint8
60 | )
61 | pos_idx_per_image_mask[pos_idx_per_image] = 1
62 | neg_idx_per_image_mask[neg_idx_per_image] = 1
63 |
64 | pos_idx.append(pos_idx_per_image_mask)
65 | neg_idx.append(neg_idx_per_image_mask)
66 |
67 | return pos_idx, neg_idx
68 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.13
2 | pyyaml>=3.12
3 | matplotlib
4 | opencv-python>=3.4.0
5 | setuptools
6 | Cython
7 | scipy
8 | six
9 | scikit-image
10 | Pillow>=6.1.0
11 |
--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
2 |
3 | Usage:
4 | import this module before import any modules under lib/
5 | e.g
6 | import _init_paths
7 | from core.config import cfg
8 | """
9 |
10 | import os.path as osp
11 | import sys
12 |
13 |
14 | def add_path(path):
15 | if path not in sys.path:
16 | sys.path.insert(0, path)
17 |
18 |
19 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
20 |
21 | # Add lib to PYTHONPATH
22 | lib_path = osp.join(this_dir)
23 | add_path(lib_path)
24 |
--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | import _init_paths # pylint: disable=unused-import
5 | from utils.misc import mkdir_p, logging_rank
6 |
7 | from rcnn.core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg
8 | from rcnn.core.test_engine import run_inference
9 |
10 | # Parse arguments
11 | parser = argparse.ArgumentParser(description='Hier R-CNN Model Testing')
12 | parser.add_argument('--cfg', dest='cfg_file',
13 | help='optional config file',
14 | default='./cfgs/mscoco_humanparts/e2e_hier_rcnn_R-50-FPN_1x.yaml', type=str)
15 | parser.add_argument('--gpu_id', type=str, default='0,1,2,3,4,5,6,7', help='gpu id for evaluation')
16 | parser.add_argument('--range', help='start (inclusive) and end (exclusive) indices', type=int, nargs=2)
17 | parser.add_argument('opts', help='See rcnn/core/config.py for all options',
18 | default=None,
19 | nargs=argparse.REMAINDER)
20 | args = parser.parse_args()
21 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
22 |
23 |
24 | def main():
25 | if len(args.gpu_id.split(',')) == 1:
26 | local_rank = int(args.gpu_id.split(',')[0])
27 | else:
28 | local_rank = -1
29 | args.local_rank = local_rank
30 |
31 | num_gpus = len(args.gpu_id.split(','))
32 | multi_gpu_testing = True if num_gpus > 1 else False
33 |
34 | if args.cfg_file is not None:
35 | merge_cfg_from_file(args.cfg_file)
36 | if args.opts is not None:
37 | merge_cfg_from_list(args.opts)
38 |
39 | if not os.path.isdir(os.path.join(cfg.CKPT, 'test')):
40 | mkdir_p(os.path.join(cfg.CKPT, 'test'))
41 | if cfg.VIS.ENABLED:
42 | if not os.path.exists(os.path.join(cfg.CKPT, 'vis')):
43 | mkdir_p(os.path.join(cfg.CKPT, 'vis'))
44 |
45 | assert_and_infer_cfg(make_immutable=False)
46 | args.test_net_file, _ = os.path.splitext(__file__)
47 | run_inference(
48 | args,
49 | ind_range=args.range,
50 | multi_gpu_testing=multi_gpu_testing
51 | )
52 |
53 |
54 | if __name__ == '__main__':
55 | main()
56 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/__init__.py
--------------------------------------------------------------------------------
/utils/collections.py:
--------------------------------------------------------------------------------
1 | class AttrDict(dict):
2 |
3 | IMMUTABLE = '__immutable__'
4 |
5 | def __init__(self, *args, **kwargs):
6 | super(AttrDict, self).__init__(*args, **kwargs)
7 | self.__dict__[AttrDict.IMMUTABLE] = False
8 |
9 | def __getattr__(self, name):
10 | if name in self.__dict__:
11 | return self.__dict__[name]
12 | elif name in self:
13 | return self[name]
14 | else:
15 | raise AttributeError(name)
16 |
17 | def __setattr__(self, name, value):
18 | if not self.__dict__[AttrDict.IMMUTABLE]:
19 | if name in self.__dict__:
20 | self.__dict__[name] = value
21 | else:
22 | self[name] = value
23 | else:
24 | raise AttributeError(
25 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
26 | format(name, value)
27 | )
28 |
29 | def immutable(self, is_immutable):
30 | """Set immutability to is_immutable and recursively apply the setting
31 | to all nested AttrDicts.
32 | """
33 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable
34 | # Recursively set immutable state
35 | for v in self.__dict__.values():
36 | if isinstance(v, AttrDict):
37 | v.immutable(is_immutable)
38 | for v in self.values():
39 | if isinstance(v, AttrDict):
40 | v.immutable(is_immutable)
41 |
42 | def is_immutable(self):
43 | return self.__dict__[AttrDict.IMMUTABLE]
44 |
--------------------------------------------------------------------------------
/utils/comm.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains primitives for multi-gpu communication.
3 | This is useful when doing distributed training.
4 | """
5 |
6 | import pickle
7 | import time
8 | import numpy as np
9 |
10 | import torch
11 | import torch.distributed as dist
12 |
13 |
14 | def get_world_size():
15 | if not dist.is_available():
16 | return 1
17 | if not dist.is_initialized():
18 | return 1
19 | return dist.get_world_size()
20 |
21 |
22 | def get_rank():
23 | if not dist.is_available():
24 | return 0
25 | if not dist.is_initialized():
26 | return 0
27 | return dist.get_rank()
28 |
29 |
30 | def is_main_process():
31 | return get_rank() == 0
32 |
33 |
34 | def synchronize():
35 | """
36 | Helper function to synchronize (barrier) among all processes when
37 | using distributed training
38 | """
39 | if not dist.is_available():
40 | return
41 | if not dist.is_initialized():
42 | return
43 | world_size = dist.get_world_size()
44 | if world_size == 1:
45 | return
46 | dist.barrier()
47 |
48 |
49 | def all_gather(data):
50 | """
51 | Run all_gather on arbitrary picklable data (not necessarily tensors)
52 | Args:
53 | data: any picklable object
54 | Returns:
55 | list[data]: list of data gathered from each rank
56 | """
57 | world_size = get_world_size()
58 | if world_size == 1:
59 | return [data]
60 |
61 | # serialized to a Tensor
62 | buffer = pickle.dumps(data)
63 | storage = torch.ByteStorage.from_buffer(buffer)
64 | tensor = torch.ByteTensor(storage).to("cuda")
65 |
66 | # obtain Tensor size of each rank
67 | local_size = torch.LongTensor([tensor.numel()]).to("cuda")
68 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
69 | dist.all_gather(size_list, local_size)
70 | size_list = [int(size.item()) for size in size_list]
71 | max_size = max(size_list)
72 |
73 | # receiving Tensor from all ranks
74 | # we pad the tensor because torch all_gather does not support
75 | # gathering tensors of different shapes
76 | tensor_list = []
77 | for _ in size_list:
78 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
79 | if local_size != max_size:
80 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
81 | tensor = torch.cat((tensor, padding), dim=0)
82 | dist.all_gather(tensor_list, tensor)
83 |
84 | data_list = []
85 | for size, tensor in zip(size_list, tensor_list):
86 | buffer = tensor.cpu().numpy().tobytes()[:size]
87 | data_list.append(pickle.loads(buffer))
88 |
89 | return data_list
90 |
91 |
92 | def shared_random_seed():
93 | """
94 | Returns:
95 | int: a random number that is the same across all workers.
96 | If workers need a shared RNG, they can use this shared seed to
97 | create one.
98 |
99 | All workers must call this function, otherwise it will deadlock.
100 | """
101 | ints = np.random.randint(2 ** 31)
102 | all_ints = all_gather(ints)
103 | return all_ints[0]
104 |
105 |
106 | def reduce_dict(input_dict, average=True):
107 | """
108 | Args:
109 | input_dict (dict): all the values will be reduced
110 | average (bool): whether to do average or sum
111 | Reduce the values in the dictionary from all processes so that process with rank
112 | 0 has the averaged results. Returns a dict with the same fields as
113 | input_dict, after reduction.
114 | """
115 | world_size = get_world_size()
116 | if world_size < 2:
117 | return input_dict
118 | with torch.no_grad():
119 | names = []
120 | values = []
121 | # sort the keys so that they are consistent across processes
122 | for k in sorted(input_dict.keys()):
123 | names.append(k)
124 | values.append(input_dict[k])
125 | values = torch.stack(values, dim=0)
126 | dist.reduce(values, dst=0)
127 | if dist.get_rank() == 0 and average:
128 | # only main process gets accumulated, so only divide by
129 | # world_size in this case
130 | values /= world_size
131 | reduced_dict = {k: v for k, v in zip(names, values)}
132 | return reduced_dict
133 |
--------------------------------------------------------------------------------
/utils/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/__init__.py
--------------------------------------------------------------------------------
/utils/data/collate_batch.py:
--------------------------------------------------------------------------------
1 | from utils.data.structures.image_list import to_image_list
2 |
3 |
4 | class BatchCollator(object):
5 | """
6 | From a list of samples from the dataset,
7 | returns the batched images and targets.
8 | This should be passed to the DataLoader
9 | """
10 |
11 | def __init__(self, size_divisible=0):
12 | self.size_divisible = size_divisible
13 |
14 | def __call__(self, batch):
15 | transposed_batch = list(zip(*batch))
16 | images = to_image_list(transposed_batch[0], self.size_divisible)
17 | targets = transposed_batch[1]
18 | img_ids = transposed_batch[2]
19 | return images, targets, img_ids
20 |
--------------------------------------------------------------------------------
/utils/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco import COCODataset
2 | from .concat_dataset import ConcatDataset
3 |
--------------------------------------------------------------------------------
/utils/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
1 | import bisect
2 |
3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
4 |
5 |
6 | class ConcatDataset(_ConcatDataset):
7 | """
8 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
9 | method for querying the sizes of the image
10 | """
11 |
12 | def get_idxs(self, idx):
13 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
14 | if dataset_idx == 0:
15 | sample_idx = idx
16 | else:
17 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
18 | return dataset_idx, sample_idx
19 |
20 | def get_img_info(self, idx):
21 | dataset_idx, sample_idx = self.get_idxs(idx)
22 | return self.datasets[dataset_idx].get_img_info(sample_idx)
23 |
--------------------------------------------------------------------------------
/utils/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed import DistributedSampler
2 | from .repeat_factor import RepeatFactorTrainingSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | from .range_sampler import RangeSampler
6 |
--------------------------------------------------------------------------------
/utils/data/samplers/distributed.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.distributed as dist
4 | from torch.utils.data.sampler import Sampler
5 |
6 |
7 | class DistributedSampler(Sampler):
8 | """Sampler that restricts data loading to a subset of the dataset.
9 | It is especially useful in conjunction with
10 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
11 | process can pass a DistributedSampler instance as a DataLoader sampler,
12 | and load a subset of the original dataset that is exclusive to it.
13 | .. note::
14 | Dataset is assumed to be of constant size.
15 | Arguments:
16 | dataset: Dataset used for sampling.
17 | num_replicas (optional): Number of processes participating in
18 | distributed training.
19 | rank (optional): Rank of the current process within num_replicas.
20 | """
21 |
22 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
23 | if num_replicas is None:
24 | if not dist.is_available():
25 | raise RuntimeError("Requires distributed package to be available")
26 | num_replicas = dist.get_world_size()
27 | if rank is None:
28 | if not dist.is_available():
29 | raise RuntimeError("Requires distributed package to be available")
30 | rank = dist.get_rank()
31 | self.dataset = dataset
32 | self.num_replicas = num_replicas
33 | self.rank = rank
34 | self.epoch = 0
35 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
36 | self.total_size = self.num_samples * self.num_replicas
37 | self.shuffle = shuffle
38 |
39 | def __iter__(self):
40 | if self.shuffle:
41 | # deterministically shuffle based on epoch
42 | g = torch.Generator()
43 | g.manual_seed(self.epoch)
44 | indices = torch.randperm(len(self.dataset), generator=g).tolist()
45 | else:
46 | indices = torch.arange(len(self.dataset)).tolist()
47 |
48 | # add extra samples to make it evenly divisible
49 | indices += indices[: (self.total_size - len(indices))]
50 | assert len(indices) == self.total_size
51 |
52 | # subsample
53 | offset = self.num_samples * self.rank
54 | indices = indices[offset : offset + self.num_samples]
55 | assert len(indices) == self.num_samples
56 |
57 | return iter(indices)
58 |
59 | def __len__(self):
60 | return self.num_samples
61 |
62 | def set_epoch(self, epoch):
63 | self.epoch = epoch
64 |
--------------------------------------------------------------------------------
/utils/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data.sampler import BatchSampler
2 |
3 |
4 | class IterationBasedBatchSampler(BatchSampler):
5 | """
6 | Wraps a BatchSampler, resampling from it until
7 | a specified number of iterations have been sampled
8 | """
9 |
10 | def __init__(self, batch_sampler, num_iterations, start_iter=0):
11 | self.batch_sampler = batch_sampler
12 | self.num_iterations = num_iterations
13 | self.start_iter = start_iter
14 |
15 | def __iter__(self):
16 | iteration = self.start_iter
17 | while iteration <= self.num_iterations:
18 | # if the underlying sampler has a set_epoch method, like
19 | # DistributedSampler, used for making each process see
20 | # a different split of the dataset, then set it
21 | if hasattr(self.batch_sampler.sampler, "set_epoch"):
22 | self.batch_sampler.sampler.set_epoch(iteration)
23 | for batch in self.batch_sampler:
24 | iteration += 1
25 | if iteration > self.num_iterations:
26 | break
27 | yield batch
28 |
29 | def __len__(self):
30 | return self.num_iterations
31 |
--------------------------------------------------------------------------------
/utils/data/samplers/range_sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data.sampler import Sampler
3 |
4 |
5 | class RangeSampler(Sampler):
6 | def __init__(self, start_ind, end_ind):
7 | self.start_ind = start_ind
8 | self.end_ind = end_ind
9 |
10 | def __iter__(self):
11 | indices = torch.arange(self.start_ind, self.end_ind).tolist()
12 | return iter(indices)
13 |
14 | def __len__(self):
15 | return self.end_ind - self.start_ind
16 |
--------------------------------------------------------------------------------
/utils/data/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/structures/__init__.py
--------------------------------------------------------------------------------
/utils/data/structures/image_list.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class ImageList(object):
5 | """
6 | Structure that holds a list of images (of possibly
7 | varying sizes) as a single tensor.
8 | This works by padding the images to the same size,
9 | and storing in a field the original sizes of each image
10 | """
11 |
12 | def __init__(self, tensors, image_sizes):
13 | """
14 | Arguments:
15 | tensors (tensor)
16 | image_sizes (list[tuple[int, int]])
17 | """
18 | self.tensors = tensors
19 | self.image_sizes = image_sizes
20 |
21 | def to(self, *args, **kwargs):
22 | cast_tensor = self.tensors.to(*args, **kwargs)
23 | return ImageList(cast_tensor, self.image_sizes)
24 |
25 |
26 | def to_image_list(tensors, size_divisible=0):
27 | """
28 | tensors can be an ImageList, a torch.Tensor or
29 | an iterable of Tensors. It can't be a numpy array.
30 | When tensors is an iterable of Tensors, it pads
31 | the Tensors with zeros so that they have the same
32 | shape
33 | """
34 | if isinstance(tensors, torch.Tensor) and size_divisible > 0:
35 | tensors = [tensors]
36 |
37 | if isinstance(tensors, ImageList):
38 | return tensors
39 | elif isinstance(tensors, torch.Tensor):
40 | # single tensor shape can be inferred
41 | if tensors.dim() == 3:
42 | tensors = tensors[None]
43 | assert tensors.dim() == 4
44 | image_sizes = [tensor.shape[-2:] for tensor in tensors]
45 | return ImageList(tensors, image_sizes)
46 | elif isinstance(tensors, (tuple, list)):
47 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
48 |
49 | # TODO Ideally, just remove this and let me model handle arbitrary
50 | # input sizs
51 | if size_divisible > 0:
52 | import math
53 |
54 | stride = size_divisible
55 | max_size = list(max_size)
56 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
57 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
58 | max_size = tuple(max_size)
59 |
60 | batch_shape = (len(tensors),) + max_size
61 | batched_imgs = tensors[0].new(*batch_shape).zero_()
62 | for img, pad_img in zip(tensors, batched_imgs):
63 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
64 |
65 | image_sizes = [im.shape[-2:] for im in tensors]
66 |
67 | return ImageList(batched_imgs, image_sizes)
68 | else:
69 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
70 |
--------------------------------------------------------------------------------
/utils/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *
2 |
--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 |
5 | def aspect_ratio_rel(im, aspect_ratio):
6 | """Performs width-relative aspect ratio transformation."""
7 | im_h, im_w = im.shape[:2]
8 | im_ar_w = int(round(aspect_ratio * im_w))
9 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
10 | return im_ar
11 |
12 |
13 | def aspect_ratio_abs(im, aspect_ratio):
14 | """Performs absolute aspect ratio transformation."""
15 | im_h, im_w = im.shape[:2]
16 | im_area = im_h * im_w
17 |
18 | im_ar_w = np.sqrt(im_area * aspect_ratio)
19 | im_ar_h = np.sqrt(im_area / aspect_ratio)
20 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
21 |
22 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
23 | return im_ar
24 |
--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
1 | import errno
2 | import os
3 | import logging
4 | import numpy as np
5 | from six.moves import cPickle as pickle
6 |
7 | import torch
8 | import torch.distributed as dist
9 |
10 | # Set up logging and load config options
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | # logging only in rank 0
16 | def logging_rank(sstr, distributed=True, local_rank=0):
17 | if distributed and local_rank == 0:
18 | logger.info(sstr)
19 | elif not distributed:
20 | logger.info(sstr)
21 | return 0
22 |
23 |
24 | def get_mean_and_std(dataset):
25 | """Compute the mean and std value of dataset."""
26 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
27 |
28 | mean = torch.zeros(3)
29 | std = torch.zeros(3)
30 | logger.info('Computing mean and std..')
31 | for inputs, targets in dataloader:
32 | for i in range(3):
33 | mean[i] += inputs[:, i, :, :].mean()
34 | std[i] += inputs[:, i, :, :].std()
35 | mean.div_(len(dataset))
36 | std.div_(len(dataset))
37 | return mean, std
38 |
39 |
40 | def mkdir_p(path):
41 | """make dir if not exist"""
42 | try:
43 | os.makedirs(path)
44 | except OSError as exc: # Python >2.5
45 | if exc.errno == errno.EEXIST and os.path.isdir(path):
46 | pass
47 | else:
48 | raise
49 |
50 |
51 | def save_object(obj, file_name):
52 | """Save a Python object by pickling it."""
53 | file_name = os.path.abspath(file_name)
54 | with open(file_name, 'wb') as f:
55 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
56 |
57 |
58 | def get_world_size() -> int:
59 | if not dist.is_available():
60 | return 1
61 | if not dist.is_initialized():
62 | return 1
63 | return dist.get_world_size()
64 |
--------------------------------------------------------------------------------
/utils/optimizer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from utils.misc import logging_rank
5 |
6 |
7 | class Optimizer(object):
8 | def __init__(self, model, solver, local_rank=0):
9 | self.model = model
10 | self.solver = solver
11 | self.local_rank = local_rank
12 |
13 | self.bias_params_list = []
14 | self.gn_params_list = []
15 | self.nonbias_params_list = []
16 |
17 | self.params = []
18 | self.gn_param_nameset = self.get_gn_param_nameset()
19 |
20 | def get_gn_param_nameset(self):
21 | gn_param_nameset = set()
22 | for name, module in self.model.named_modules():
23 | if isinstance(module, nn.GroupNorm):
24 | gn_param_nameset.add(name + '.weight')
25 | gn_param_nameset.add(name + '.bias')
26 | return gn_param_nameset
27 |
28 | def get_params_list(self):
29 | for key, value in self.model.named_parameters():
30 | if value.requires_grad:
31 | if 'bias' in key:
32 | self.bias_params_list.append(value)
33 | elif key in self.gn_param_nameset:
34 | self.gn_params_list.append(value)
35 | else:
36 | self.nonbias_params_list.append(value)
37 | else:
38 | logging_rank('{} does not need grad.'.format(key), local_rank=self.local_rank)
39 |
40 | def get_params(self):
41 | self.params += [
42 | {'params': self.nonbias_params_list,
43 | 'lr': 0,
44 | 'weight_decay': self.solver.WEIGHT_DECAY,
45 | 'lr_scale': 1},
46 | {'params': self.bias_params_list,
47 | 'lr': 0 * (self.solver.BIAS_DOUBLE_LR + 1),
48 | 'weight_decay': self.solver.WEIGHT_DECAY if self.solver.BIAS_WEIGHT_DECAY else 0,
49 | 'lr_scale': self.solver.BIAS_DOUBLE_LR + 1},
50 | {'params': self.gn_params_list,
51 | 'lr': 0,
52 | 'weight_decay': self.solver.WEIGHT_DECAY_GN * self.solver.WEIGHT_DECAY,
53 | 'lr_scale': 1}
54 | ]
55 |
56 | def build(self):
57 | assert self.solver.OPTIMIZER in ['SGD', 'RMSPROP', 'ADAM']
58 | self.get_params_list()
59 | self.get_params()
60 |
61 | if self.solver.OPTIMIZER == 'SGD':
62 | optimizer = torch.optim.SGD(
63 | self.params,
64 | momentum=self.solver.MOMENTUM
65 | )
66 | elif self.solver.OPTIMIZER == 'RMSPROP':
67 | optimizer = torch.optim.RMSprop(
68 | self.params,
69 | momentum=self.solver.MOMENTUM
70 | )
71 | elif self.solver.OPTIMIZER == 'ADAM':
72 | optimizer = torch.optim.Adam(
73 | self.model.parameters(),
74 | lr=self.solver.BASE_LR
75 | )
76 | else:
77 | optimizer = None
78 | return optimizer
79 |
--------------------------------------------------------------------------------
/utils/registry.py:
--------------------------------------------------------------------------------
1 | def _register_generic(module_dict, module_name, module):
2 | assert module_name not in module_dict
3 | module_dict[module_name] = module
4 |
5 |
6 | class Registry(dict):
7 | """
8 | A helper class for managing registering modules, it extends a dictionary
9 | and provides a register functions.
10 | Eg. creeting a registry:
11 | some_registry = Registry({"default": default_module})
12 | There're two ways of registering new modules:
13 | 1): normal way is just calling register function:
14 | def foo():
15 | ...
16 | some_registry.register("foo_module", foo)
17 | 2): used as decorator when declaring the module:
18 | @some_registry.register("foo_module")
19 | @some_registry.register("foo_modeul_nickname")
20 | def foo():
21 | ...
22 | Access of module is just like using a dictionary, eg:
23 | f = some_registry["foo_modeul"]
24 | """
25 | def __init__(self, *args, **kwargs):
26 | super(Registry, self).__init__(*args, **kwargs)
27 |
28 | def register(self, module_name, module=None):
29 | # used as function call
30 | if module is not None:
31 | _register_generic(self, module_name, module)
32 | return
33 |
34 | # used as decorator
35 | def register_fn(fn):
36 | _register_generic(self, module_name, fn)
37 | return fn
38 |
39 | return register_fn
40 |
41 |
--------------------------------------------------------------------------------
/utils/subprocess.py:
--------------------------------------------------------------------------------
1 | import os
2 | import yaml
3 | import subprocess
4 | import numpy as np
5 | from io import IOBase
6 | from six.moves import shlex_quote
7 | from six.moves import cPickle as pickle
8 |
9 | from utils.misc import logging_rank
10 |
11 |
12 | def process_in_parallel(tag, total_range_size, binary, cfg, ckpt_path):
13 | """Run the specified binary NUM_GPUS times in parallel, each time as a
14 | subprocess that uses one GPU. The binary must accept the command line
15 | arguments `--range {start} {end}` that specify a data processing range.
16 | """
17 | # subprocesses
18 | cfg_file = os.path.join(ckpt_path, 'test', '{}_range_config.yaml'.format(tag))
19 | with open(cfg_file, 'w') as f:
20 | yaml.dump(cfg, stream=f)
21 | subprocess_env = os.environ.copy()
22 | processes = []
23 | # Determine GPUs to use
24 | cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
25 | if cuda_visible_devices:
26 | gpu_inds = list(map(int, cuda_visible_devices.split(',')))
27 | assert -1 not in gpu_inds, \
28 | 'Hiding GPU indices using the \'-1\' index is not supported'
29 | else:
30 | raise NotImplementedError
31 | subinds = np.array_split(range(total_range_size), len(gpu_inds))
32 | # Run the binary in cfg.NUM_GPUS subprocesses
33 | for i, gpu_ind in enumerate(gpu_inds):
34 | start = subinds[i][0]
35 | end = subinds[i][-1] + 1
36 | subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
37 | cmd = ('python {binary} --range {start} {end} --cfg {cfg_file} --gpu_id {gpu_id}')
38 | cmd = cmd.format(
39 | binary=shlex_quote(binary),
40 | start=int(start),
41 | end=int(end),
42 | cfg_file=shlex_quote(cfg_file),
43 | gpu_id=str(gpu_ind),
44 | )
45 | logging_rank('{} range command {}: {}'.format(tag, i, cmd))
46 | if i == 0:
47 | subprocess_stdout = subprocess.PIPE
48 | else:
49 | filename = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end))
50 | subprocess_stdout = open(filename, 'w')
51 | p = subprocess.Popen(
52 | cmd,
53 | shell=True,
54 | env=subprocess_env,
55 | stdout=subprocess_stdout,
56 | stderr=subprocess.STDOUT,
57 | bufsize=1
58 | )
59 | processes.append((i, p, start, end, subprocess_stdout))
60 | # Log output from inference processes and collate their results
61 | outputs = []
62 | for i, p, start, end, subprocess_stdout in processes:
63 | log_subprocess_output(i, p, ckpt_path, tag, start, end)
64 | if isinstance(subprocess_stdout, IOBase):
65 | subprocess_stdout.close()
66 | range_file = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.pkl' % (tag, start, end))
67 | range_data = pickle.load(open(range_file, 'rb'))
68 | outputs.append(range_data)
69 | return outputs
70 |
71 |
72 | def log_subprocess_output(i, p, ckpt_path, tag, start, end):
73 | """Capture the output of each subprocess and log it in the parent process.
74 | The first subprocess's output is logged in realtime. The output from the
75 | other subprocesses is buffered and then printed all at once (in order) when
76 | subprocesses finish.
77 | """
78 | outfile = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end))
79 | logging_rank('# ' + '-' * 76 + ' #')
80 | logging_rank('stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end))
81 | logging_rank('# ' + '-' * 76 + ' #')
82 | if i == 0:
83 | # Stream the piped stdout from the first subprocess in realtime
84 | with open(outfile, 'w') as f:
85 | for line in iter(p.stdout.readline, b''):
86 | print(line.rstrip().decode('ascii'))
87 | f.write(str(line, encoding='ascii'))
88 | p.stdout.close()
89 | ret = p.wait()
90 | else:
91 | # For subprocesses >= 1, wait and dump their log file
92 | ret = p.wait()
93 | with open(outfile, 'r') as f:
94 | print(''.join(f.readlines()))
95 | assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)
96 |
--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 |
4 | class Timer(object):
5 | """A simple timer."""
6 |
7 | def __init__(self):
8 | self.reset()
9 |
10 | def tic(self):
11 | # using time.time instead of time.clock because time time.clock
12 | # does not normalize for multithreading
13 | self.start_time = time.time()
14 |
15 | def toc(self, average=True):
16 | self.diff = time.time() - self.start_time
17 | self.total_time += self.diff
18 | self.calls += 1
19 | self.average_time = self.total_time / self.calls
20 | if average:
21 | return self.average_time
22 | else:
23 | return self.diff
24 |
25 | def reset(self):
26 | self.total_time = 0.
27 | self.calls = 0
28 | self.start_time = 0.
29 | self.diff = 0.
30 | self.average_time = 0.
31 |
--------------------------------------------------------------------------------
/weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/weights/README.md
--------------------------------------------------------------------------------