├── INSTALL.md ├── LICENSE ├── README.md ├── cfgs ├── CIHP │ ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml │ └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml ├── DensePose_COCO │ └── e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml └── MHP-v2 │ ├── e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml │ └── e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml ├── ckpts └── README.md ├── data ├── output.png └── parsing_rcnn.png ├── make.sh ├── models ├── __init__.py ├── imagenet │ ├── __init__.py │ ├── hrnet.py │ ├── mobilenet_v1.py │ ├── mobilenet_v2.py │ ├── mobilenet_v3.py │ ├── resnet.py │ ├── resnext.py │ ├── utils.py │ └── vovnet.py └── ops │ ├── __init__.py │ ├── adjust_smooth_l1_loss.py │ ├── affine.py │ ├── batch_norm.py │ ├── bilinear_interpolation2d.py │ ├── boxes.py │ ├── context_block.py │ ├── conv2d_samepadding.py │ ├── conv2d_ws.py │ ├── csrc │ ├── PoolPointsInterp.h │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── PoolPointsInterp_cuda.cu │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── ml_nms.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── ml_nms.h │ ├── nms.h │ └── vision.cpp │ ├── cython_bbox.c │ ├── cython_bbox.pyx │ ├── cython_nms.c │ ├── cython_nms.pyx │ ├── dcn │ ├── __init__.py │ ├── deform_conv_func.py │ ├── deform_conv_module.py │ ├── deform_pool_func.py │ └── deform_pool_module.py │ ├── dropblock.py │ ├── iou_loss.py │ ├── l2_loss.py │ ├── l2norm.py │ ├── label_smoothing.py │ ├── lovasz_hinge_loss.py │ ├── misc.py │ ├── mixture_batchnorm.py │ ├── nms.py │ ├── nonlocal2d.py │ ├── pool_points_interp.py │ ├── scale.py │ ├── setup_rcnn.py │ ├── setup_ssd.py │ ├── sigmoid_focal_loss.py │ ├── smooth_l1_loss.py │ └── squeeze_excitation.py ├── rcnn ├── __init__.py ├── core │ ├── __init__.py │ ├── config.py │ ├── test.py │ └── test_engine.py ├── datasets │ ├── __init__.py │ ├── dataset.py │ ├── dataset_catalog.py │ ├── evaluation.py │ └── transform.py ├── modeling │ ├── backbone │ │ ├── HRNet.py │ │ ├── MobileNet_v1.py │ │ ├── MobileNet_v2.py │ │ ├── MobileNet_v3.py │ │ ├── ResNeXt.py │ │ ├── ResNet.py │ │ ├── VoVNet.py │ │ └── __init__.py │ ├── cascade_rcnn │ │ ├── __init__.py │ │ ├── cascade_rcnn.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── convfc_heads.py │ │ │ └── mlp_heads.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── outputs.py │ ├── fast_rcnn │ │ ├── __init__.py │ │ ├── fast_rcnn.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── convfc_heads.py │ │ │ └── mlp_heads.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── outputs.py │ ├── fpn │ │ ├── FPN.py │ │ ├── HRFPN.py │ │ └── __init__.py │ ├── keypoint_rcnn │ │ ├── __init__.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── convx_heads.py │ │ │ └── gce_heads.py │ │ ├── inference.py │ │ ├── keypoint_rcnn.py │ │ ├── loss.py │ │ └── outputs.py │ ├── mask_rcnn │ │ ├── __init__.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ └── convx_heads.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── mask_rcnn.py │ │ └── outputs.py │ ├── model_builder.py │ ├── parsing_rcnn │ │ ├── __init__.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── convx_heads.py │ │ │ └── gce_heads.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── outputs.py │ │ └── parsing_rcnn.py │ ├── registry.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── rpn.py │ └── uv_rcnn │ │ ├── heads │ │ ├── __init__.py │ │ ├── convx_heads.py │ │ └── gce_heads.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── outputs.py │ │ └── uv_rcnn.py ├── ops │ ├── __init__.py │ ├── deform_pool.py │ ├── roi_align.py │ └── roi_pool.py └── utils │ ├── __init__.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── matcher.py │ ├── misc.py │ └── poolers.py ├── requirements.txt ├── tools ├── _init_paths.py ├── test_net.py └── train_net.py ├── utils ├── __init__.py ├── checkpointer.py ├── collections.py ├── colormap.py ├── comm.py ├── data │ ├── __init__.py │ ├── collate_batch.py │ ├── dataset_catalog.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ └── concat_dataset.py │ ├── evaluation │ │ ├── densepose_cocoeval.py │ │ ├── densepose_methods.py │ │ └── parsing_eval.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ ├── iteration_based_batch_sampler.py │ │ ├── range_sampler.py │ │ └── repeat_factor.py │ ├── structures │ │ ├── __init__.py │ │ ├── bounding_box.py │ │ ├── boxlist_ops.py │ │ ├── densepose_uv.py │ │ ├── image_list.py │ │ ├── keypoint.py │ │ ├── parsing.py │ │ └── segmentation_mask.py │ └── transforms │ │ ├── __init__.py │ │ └── transforms.py ├── image.py ├── logger.py ├── lr_scheduler.py ├── measure.py ├── misc.py ├── net.py ├── optimizer.py ├── registry.py ├── subprocess.py ├── timer.py └── vis.py └── weights └── README.md /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ``` 4 | # install pytorch 1.1 and torchvision 5 | sudo pip3 install torch==1.1 torchvision 6 | 7 | # install apex 8 | cd $INSTALL_DIR 9 | git clone https://github.com/NVIDIA/apex.git 10 | cd apex 11 | sudo python setup.py install --cuda_ext --cpp_ext 12 | 13 | # clone Hier-R-CNN 14 | git clone https://github.com/soeaver/Parsing-R-CNN.git 15 | 16 | # install other requirements 17 | pip3 install -r requirements.txt 18 | 19 | # mask ops 20 | cd Hier-R-CNN 21 | sh make.sh 22 | 23 | # make cocoapi 24 | cd Parsing-R-CNN/cocoapi/PythonAPI 25 | mask 26 | cd ../../ 27 | ln -s cocoapi/PythonAPI/pycocotools/ ./ 28 | ``` 29 | 30 | ## Data and Pre-train weights 31 | 32 | Make sure to put the files as the following structure: 33 | 34 | ``` 35 | ├─data 36 | │ ├─coco 37 | │  │ ├─images 38 | │  │ │ ├─train2017 39 | │  │ │ ├─val2017 40 | │ │ ├─annotations 41 | │  │ │ ├─DensePoseData 42 | │  │ │ │ ├─densepose_coco_train2017.json 43 | │  │ │ │ ├─densepose_coco_val2017.json 44 | │  │ │ │ ├─densepose_coco_test2017.json 45 | | | 46 | │ ├─CIHP 47 | │  │ ├─train_img 48 | │  │ │─train_parsing 49 | │  │ │─train_seg 50 | │  │ ├─val_img 51 | │  │ │─val_parsing 52 | │  │ │─val_seg 53 | │ │ ├─annotations 54 | │  │ │ ├─CIHP_train.json 55 | │  │ │ ├─CIHP_val.json 56 | | | 57 | │ ├─MHP-v2 58 | │  │ ├─train_img 59 | │  │ │─train_parsing 60 | │  │ │─train_seg 61 | │  │ ├─val_img 62 | │  │ │─val_parsing 63 | │  │ │─val_seg 64 | │ │ ├─annotations 65 | │  │ │ ├─MHP-v2_train.json 66 | │  │ │ ├─MHP-v2_val.json 67 | | 68 | ├─weights 69 | ├─resnet50_caffe.pth 70 | ├─resnet101_caffe.pth 71 | ├─resnext101_32x8d-8ba56ff5.pth 72 | 73 | ``` 74 | 75 | - Densepose estimation using original coco images. 76 | - For training and evaluating densepose estimation on Parsing R-CNN, you need fetch DensePose data following [original repo](https://github.com/facebookresearch/DensePose/blob/master/INSTALL.md#fetch-densepose-data) 77 | 78 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parsing-R-CNN 2 | 3 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/human-part-segmentation-on-cihp)](https://paperswithcode.com/sota/human-part-segmentation-on-cihp?p=parsing-r-cnn-for-instance-level-human) 4 | 5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/pose-estimation-on-densepose-coco)](https://paperswithcode.com/sota/pose-estimation-on-densepose-coco?p=parsing-r-cnn-for-instance-level-human) 6 | 7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/parsing-r-cnn-for-instance-level-human/human-part-segmentation-on-mhp-v20)](https://paperswithcode.com/sota/human-part-segmentation-on-mhp-v20?p=parsing-r-cnn-for-instance-level-human) 8 | 9 | **(New!)** Official implementation of **Parsing R-CNN for Instance-Level Human Analysis (CVPR 2019)** 10 | 11 | ## Citing Parsing R-CNN 12 | 13 | If you use Parsing R-CNN, please use the following BibTeX entry. 14 | 15 | ```BibTeX 16 | @inproceedings{yang2019cvpr, 17 | title = {Parsing R-CNN for Instance-Level Human Analysis}, 18 | author = {Lu Yang and Qing Song and Zhihui Wang and Ming Jiang}, 19 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 20 | year = {2019} 21 | } 22 | 23 | ``` 24 | 25 | In this repository, we release the Parsing R-CNN code in Pytorch. 26 | 27 | - Parsing R-CNN architecture: 28 |

29 | 30 | - Parsing R-CNN output: 31 |

32 | 33 | 34 | ## Installation 35 | - 8 x TITAN RTX GPU 36 | - pytorch1.1 37 | - python3.6.8 38 | 39 | Install Parsing R-CNN following [INSTALL.md](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#install). 40 | 41 | 42 | ## Dataset 43 | 44 | You need to download the datasets and annotations following this repo's formate. As: 45 | 46 | - [CIHP](https://drive.google.com/open?id=1OLBd23ufm6CU8CZmLEYMdF-x2b8mRgxV) 47 | 48 | - [MHP-v2](coming soon) 49 | 50 | - [DensePoseData](https://drive.google.com/open?id=1WiTLYVIgMyCDENXHPVEWW7qbZ-3EBjbt)(using original [MSCOCO2017](http://cocodataset.org/#download) images) 51 | 52 | And following [data structure](https://github.com/soeaver/Parsing-R-CNN/blob/master/INSTALL.md#data-and-pre-train-weights) to train or evaluate Parsing R-CNN models. 53 | 54 | 55 | ## Results and Models 56 | 57 | **On CIHP val** 58 | 59 | | Backbone | LR | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD | 60 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:| 61 | | R-50-FPN | 1x | 65.8 | 52.8 | 57.2/51.2/55.4 | | 62 | | R-50-FPN | 3x | 68.7 | 56.0 | 64.1/54.1/60.7 | [GoogleDrive](https://drive.google.com/open?id=16bASrD7AoCADKzXynIgmdyzmbuzCfAUL)| 63 | 64 | 65 | **On MHP-v2 val** 66 | 67 | | Backbone | LR | Det AP | mIoU |Parsing (APp50/APvol/PCP50) | DOWNLOAD | 68 | |------------|:----:|:------:|:----:|:--------------------------:| :-------:| 69 | | R-50-FPN | 1x | 66.5 | 34.0 | 19.9/36.7/32.4 | | 70 | | R-50-FPN | 3x | 69.0 | 36.1 | 27.4/40.5/38.3 | [GoogleDrive](https://drive.google.com/open?id=1rbSNP4_DoJdNK4l6KHrthO0x4WOFgHGy)| 71 | 72 | 73 | **On DensePose_COCO val** 74 | 75 | | Backbone | LR | Det AP |UV AP (AP/AP50/AP75/APm/APl)| DOWNLOAD | 76 | |------------|:----:|:------:|:--------------------------:| :-------:| 77 | | R-50-FPN | s1x | 57.4 | 59.3/90.5/68.7/56.2/60.8 | [GoogleDrive](https://drive.google.com/open?id=1YQygKoOb5SbZWYnF7f9vEpC_NenpMhH5)| 78 | 79 | 80 | - New metric GPSm is adopted for evaluating UV 81 | 82 | 83 | **ImageNet pretrained weight** 84 | 85 | - [R-50](https://drive.google.com/open?id=1EtqFhrFTdBJNbp67effArVrTNx4q_ELr) 86 | - [R-50-GN](https://drive.google.com/open?id=1LzcVD7aADhXXY32DdtKhaY9hTXaduhlg) 87 | - [X-101-32x8d](https://drive.google.com/open?id=1c4OSVZIZtDT49B0DTC0tK3vcRgJpzR9n) 88 | 89 | 90 | ## Visualization 91 | 92 | coming soon. 93 | 94 | 95 | ## Training 96 | 97 | To train a model with 8 GPUs run: 98 | ``` 99 | python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --cfg cfgs/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml 100 | ``` 101 | 102 | 103 | ## Evaluation 104 | 105 | ### multi-gpu evaluation, 106 | ``` 107 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0,1,2,3,4,5,6,7 108 | ``` 109 | 110 | ### single-gpu evaluation, 111 | ``` 112 | python tools/test_net.py --cfg ckpts/CIHP/e2e_rp_rcnn_R-50-FPN_3x_ms/e2e_rp_rcnn_R-50-FPN_3x_ms.yaml --gpu_id 0 113 | ``` 114 | 115 | 116 | ## License 117 | Parsing-R-CNN is released under the [MIT license](https://github.com/soeaver/Parsing-R-CNN/blob/master/LICENSE). 118 | -------------------------------------------------------------------------------- /cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml: -------------------------------------------------------------------------------- 1 | # bbox_AP: 65.8/92.8/73.7/3.4/46.8/68.7; 2 | # parsing: (mIoU:52.8/AP50:57.2/APvol:51.2/PCP50:55.4) 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717] 4 | PIXEL_STDS: [1.0, 1.0, 1.0] 5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_1x_ms' 6 | MODEL: 7 | FPN_ON: True 8 | FASTER_ON: True 9 | PARSING_ON: True 10 | NUM_CLASSES: 2 11 | CONV1_RGB2BGR: False # caffe style 12 | BACKBONE: 13 | CONV_BODY: "resnet" 14 | RESNET: # caffe style 15 | LAYERS: (3, 4, 6, 3) 16 | RPN: 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | FAST_RCNN: 23 | ROI_XFORM_RESOLUTION: (7, 7) 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | PRCNN: 26 | ROI_PARSING_HEAD: "roi_gce_head" 27 | NUM_PARSING: 20 28 | ROI_STRIDES: [4] 29 | ROI_SIZE_PER_IMG: 16 30 | ROI_XFORM_RESOLUTION: (32, 32) 31 | ROI_XFORM_SAMPLING_RATIO: 2 32 | RESOLUTION: (128, 128) 33 | LOSS_WEIGHT: 2.0 # double loss weight 34 | GCE_HEAD: 35 | NUM_CONVS_AFTER_ASPPV3: 4 36 | USE_NL: True 37 | SOLVER: 38 | WEIGHT_DECAY: 0.0001 39 | BASE_LR: 0.02 40 | GAMMA: 0.1 41 | WARM_UP_ITERS: 500 42 | WARM_UP_FACTOR: 0.01 43 | MAX_ITER: 45000 44 | STEPS: [30000, 40000] 45 | SNAPSHOT_ITERS: 5000 46 | TRAIN: 47 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth 48 | DATASETS: ('CIHP_train', ) 49 | SCALES: (512, 640, 704, 768, 800, 864) 50 | MAX_SIZE: 1400 51 | TEST: 52 | DATASETS: ('CIHP_val',) 53 | SCALE: 800 54 | MAX_SIZE: 1333 55 | -------------------------------------------------------------------------------- /cfgs/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml: -------------------------------------------------------------------------------- 1 | # bbox_AP: 68.7/93.0/76.2/2.0/48.0/71.8; 2 | # parsing: (mIoU:56.0/AP50:64.1/APvol:54.1/PCP50:60.7) 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717] 4 | PIXEL_STDS: [1.0, 1.0, 1.0] 5 | CKPT: 'ckpts/CIHP/e2e_parsing_rcnn_R-50-FPN_3x_ms' 6 | MODEL: 7 | FPN_ON: True 8 | FASTER_ON: True 9 | PARSING_ON: True 10 | NUM_CLASSES: 2 11 | CONV1_RGB2BGR: False # caffe style 12 | BACKBONE: 13 | CONV_BODY: "resnet" 14 | RESNET: # caffe style 15 | LAYERS: (3, 4, 6, 3) 16 | RPN: 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | FAST_RCNN: 23 | ROI_XFORM_RESOLUTION: (7, 7) 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | PRCNN: 26 | ROI_PARSING_HEAD: "roi_gce_head" 27 | NUM_PARSING: 20 28 | ROI_STRIDES: [4] 29 | ROI_SIZE_PER_IMG: 16 30 | ROI_XFORM_RESOLUTION: (32, 32) 31 | ROI_XFORM_SAMPLING_RATIO: 2 32 | RESOLUTION: (128, 128) 33 | LOSS_WEIGHT: 2.0 # double loss weight 34 | GCE_HEAD: 35 | NUM_CONVS_AFTER_ASPPV3: 4 36 | USE_NL: True 37 | SOLVER: 38 | WEIGHT_DECAY: 0.0001 39 | BASE_LR: 0.02 40 | GAMMA: 0.1 41 | WARM_UP_ITERS: 500 42 | WARM_UP_FACTOR: 0.01 43 | MAX_ITER: 135000 44 | STEPS: [105000, 125000] 45 | TRAIN: 46 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth 47 | DATASETS: ('CIHP_train', ) 48 | SCALES: (512, 640, 704, 768, 800, 864) 49 | MAX_SIZE: 1400 50 | TEST: 51 | DATASETS: ('CIHP_val',) 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | -------------------------------------------------------------------------------- /cfgs/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms.yaml: -------------------------------------------------------------------------------- 1 | # bbox_AP: 57.4/87.6/62.7/30.3/56.2/70.2; 2 | # uv_AP (GPSm): 59.3/90.5/68.7/52.6/60.8; 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717] 4 | PIXEL_STDS: [1.0, 1.0, 1.0] 5 | CKPT: 'ckpts/DensePose_COCO/e2e_parsing_rcnn_R-50-FPN_s1x_ms' 6 | MODEL: 7 | FPN_ON: True 8 | FASTER_ON: True 9 | UV_ON: True 10 | NUM_CLASSES: 2 11 | CONV1_RGB2BGR: False # caffe style 12 | BACKBONE: 13 | CONV_BODY: "resnet" 14 | RESNET: # caffe style 15 | LAYERS: (3, 4, 6, 3) 16 | RPN: 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | FAST_RCNN: 23 | ROI_XFORM_RESOLUTION: (7, 7) 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | UVRCNN: 26 | ROI_UV_HEAD: "roi_gce_head" 27 | ROI_STRIDES: [4] 28 | ROI_SIZE_PER_IMG: 32 29 | ROI_XFORM_RESOLUTION: (32, 32) 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | RESOLUTION: (128, 128) 32 | INDEX_WEIGHTS : 2.0 33 | PART_WEIGHTS : 0.3 34 | POINT_REGRESSION_WEIGHTS : 0.1 35 | GCE_HEAD: 36 | NUM_CONVS_AFTER_ASPPV3: 4 37 | USE_NL: True 38 | SOLVER: 39 | WEIGHT_DECAY: 0.0001 40 | BASE_LR: 0.002 41 | GAMMA: 0.1 42 | WARM_UP_ITERS: 500 43 | MAX_ITER: 130000 44 | STEPS: [100000, 120000] 45 | TRAIN: 46 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth 47 | DATASETS: ('dense_coco_2017_train', ) 48 | SCALES: (512, 640, 704, 768, 800, 864) 49 | MAX_SIZE: 1400 50 | TEST: 51 | DATASETS: ('dense_coco_2017_val',) 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | -------------------------------------------------------------------------------- /cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms.yaml: -------------------------------------------------------------------------------- 1 | # bbox_AP: 66.5/93.8/76.8/-1.0/52.0/66.7; 2 | # parsing: (mIoU:34.0/AP50:19.9/APvol:37.6/PCP50:32.4) 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717] 4 | PIXEL_STDS: [1.0, 1.0, 1.0] 5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_1x_ms' 6 | MODEL: 7 | FPN_ON: True 8 | FASTER_ON: True 9 | PARSING_ON: True 10 | NUM_CLASSES: 2 11 | CONV1_RGB2BGR: False # caffe style 12 | BACKBONE: 13 | CONV_BODY: "resnet" 14 | RESNET: # caffe style 15 | LAYERS: (3, 4, 6, 3) 16 | RPN: 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | FAST_RCNN: 23 | ROI_XFORM_RESOLUTION: (7, 7) 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | PRCNN: 26 | ROI_PARSING_HEAD: "roi_gce_head" 27 | NUM_PARSING: 59 28 | ROI_STRIDES: [4] 29 | ROI_SIZE_PER_IMG: 16 30 | ROI_XFORM_RESOLUTION: (32, 32) 31 | ROI_XFORM_SAMPLING_RATIO: 2 32 | RESOLUTION: (128, 128) 33 | SEMSEG_SCORE_THRESH: 0.05 34 | LOSS_WEIGHT: 2.0 # double loss weight 35 | GCE_HEAD: 36 | NUM_CONVS_AFTER_ASPPV3: 4 37 | USE_NL: True 38 | SOLVER: 39 | WEIGHT_DECAY: 0.0001 40 | BASE_LR: 0.02 41 | GAMMA: 0.1 42 | WARM_UP_ITERS: 500 43 | WARM_UP_FACTOR: 0.01 44 | MAX_ITER: 24000 45 | STEPS: [15000, 20000] 46 | SNAPSHOT_ITERS: 5000 47 | TRAIN: 48 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth 49 | DATASETS: ('MHP-v2_train', ) 50 | SCALES: (512, 640, 704, 768, 800, 864) 51 | MAX_SIZE: 1400 52 | TEST: 53 | DATASETS: ('MHP-v2_val',) 54 | SCALE: 800 55 | MAX_SIZE: 1333 56 | -------------------------------------------------------------------------------- /cfgs/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms.yaml: -------------------------------------------------------------------------------- 1 | # bbox_AP: 69.0/94.1/78.8/-1.0/56.7/69.1; 2 | # parsing: (mIoU:36.1/AP50:27.4/APvol:40.5/PCP50:38.3) 3 | PIXEL_MEANS: [102.9801, 115.9465, 122.7717] 4 | PIXEL_STDS: [1.0, 1.0, 1.0] 5 | CKPT: 'ckpts/MHP-v2/e2e_parsing_rcnn_R-50-FPN_3x_ms' 6 | MODEL: 7 | FPN_ON: True 8 | FASTER_ON: True 9 | PARSING_ON: True 10 | NUM_CLASSES: 2 11 | CONV1_RGB2BGR: False # caffe style 12 | BACKBONE: 13 | CONV_BODY: "resnet" 14 | RESNET: # caffe style 15 | LAYERS: (3, 4, 6, 3) 16 | RPN: 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | FAST_RCNN: 23 | ROI_XFORM_RESOLUTION: (7, 7) 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | PRCNN: 26 | ROI_PARSING_HEAD: "roi_gce_head" 27 | NUM_PARSING: 59 28 | ROI_STRIDES: [4] 29 | ROI_SIZE_PER_IMG: 16 30 | ROI_XFORM_RESOLUTION: (32, 32) 31 | ROI_XFORM_SAMPLING_RATIO: 2 32 | RESOLUTION: (128, 128) 33 | SEMSEG_SCORE_THRESH: 0.05 34 | LOSS_WEIGHT: 2.0 # double loss weight 35 | GCE_HEAD: 36 | NUM_CONVS_AFTER_ASPPV3: 4 37 | USE_NL: True 38 | SOLVER: 39 | WEIGHT_DECAY: 0.0001 40 | BASE_LR: 0.02 41 | GAMMA: 0.1 42 | WARM_UP_ITERS: 500 43 | WARM_UP_FACTOR: 0.01 44 | MAX_ITER: 72000 45 | STEPS: [54000, 64000] 46 | TRAIN: 47 | WEIGHTS: weights/pytorch-model/caffe-model/resnet50_caffe.pth 48 | DATASETS: ('MHP-v2_train', ) 49 | SCALES: (512, 640, 704, 768, 800, 864) 50 | MAX_SIZE: 1400 51 | TEST: 52 | DATASETS: ('MHP-v2_val',) 53 | SCALE: 800 54 | MAX_SIZE: 1333 55 | -------------------------------------------------------------------------------- /ckpts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/ckpts/README.md -------------------------------------------------------------------------------- /data/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/output.png -------------------------------------------------------------------------------- /data/parsing_rcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/data/parsing_rcnn.png -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # export CXXFLAGS="-std=c++11" 4 | # export CFLAGS="-std=c99" 5 | 6 | PYTHON=${PYTHON:-"python"} 7 | cd models/ops 8 | 9 | echo "Building bbox op..." 10 | python setup_ssd.py build_ext --inplace 11 | rm -rf build 12 | 13 | echo "Building rcnn op..." 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup_rcnn.py build_ext --inplace 18 | rm -r build 19 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/imagenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import * 2 | from .mobilenet_v1 import * 3 | from .mobilenet_v2 import * 4 | from .mobilenet_v3 import * 5 | from .resnet import * 6 | from .resnext import * 7 | from .vovnet import * 8 | -------------------------------------------------------------------------------- /models/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm 2 | from .misc import Conv2d, ConvTranspose2d, BatchNorm2d, interpolate 3 | from .nms import nms, ml_nms 4 | from .l2_loss import l2_loss 5 | from .iou_loss import IOULoss 6 | from .scale import Scale 7 | from .smooth_l1_loss import smooth_l1_loss, smooth_l1_loss_LW 8 | from .adjust_smooth_l1_loss import AdjustSmoothL1Loss 9 | from .sigmoid_focal_loss import SigmoidFocalLoss 10 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 11 | from .dcn.deform_conv_module import DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack 12 | from .dcn.deform_pool_func import deform_roi_pooling 13 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 14 | from .affine import AffineChannel2d 15 | from .bilinear_interpolation2d import BilinearInterpolation2d 16 | from .conv2d_samepadding import Conv2dSamePadding 17 | from .conv2d_ws import Conv2dWS 18 | from .dropblock import DropBlock2D 19 | from .l2norm import L2Norm 20 | from .label_smoothing import LabelSmoothing 21 | from .nonlocal2d import NonLocal2d, MS_NonLocal2d 22 | from .squeeze_excitation import SeConv2d, GDWSe2d 23 | from .pool_points_interp import PoolPointsInterp 24 | from .context_block import GlobalContextBlock 25 | from .mixture_batchnorm import MixtureBatchNorm2d, MixtureGroupNorm 26 | from .lovasz_hinge_loss import LovaszHinge 27 | -------------------------------------------------------------------------------- /models/ops/adjust_smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class AdjustSmoothL1Loss(nn.Module): 6 | 7 | def __init__(self, num_features, momentum=0.1, beta=1. /9): 8 | super(AdjustSmoothL1Loss, self).__init__() 9 | self.num_features = num_features 10 | self.momentum = momentum 11 | self.beta = beta 12 | self.register_buffer( 13 | 'running_mean', torch.empty(num_features).fill_(beta) 14 | ) 15 | self.register_buffer('running_var', torch.zeros(num_features)) 16 | 17 | def forward(self, inputs, target, size_average=True): 18 | 19 | n = torch.abs(inputs -target) 20 | with torch.no_grad(): 21 | if torch.isnan(n.var(dim=0)).sum().item() == 0: 22 | self.running_mean = self.running_mean.to(n.device) 23 | self.running_mean *= (1 - self.momentum) 24 | self.running_mean += (self.momentum * n.mean(dim=0)) 25 | self.running_var = self.running_var.to(n.device) 26 | self.running_var *= (1 - self.momentum) 27 | self.running_var += (self.momentum * n.var(dim=0)) 28 | 29 | 30 | beta = (self.running_mean - self.running_var) 31 | beta = beta.clamp(max=self.beta, min=1e-3) 32 | 33 | beta = beta.view(-1, self.num_features).to(n.device) 34 | cond = n < beta.expand_as(n) 35 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 36 | if size_average: 37 | return loss.mean() 38 | return loss.sum() 39 | 40 | -------------------------------------------------------------------------------- /models/ops/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AffineChannel2d(nn.Module): 6 | """ A simple channel-wise affine transformation operation """ 7 | def __init__(self, num_features): 8 | super().__init__() 9 | self.num_features = num_features 10 | self.weight = nn.Parameter(torch.Tensor(num_features)) 11 | self.bias = nn.Parameter(torch.Tensor(num_features)) 12 | self.weight.data.uniform_() 13 | self.bias.data.zero_() 14 | 15 | def forward(self, x): 16 | return x * self.weight.view(1, self.num_features, 1, 1) + \ 17 | self.bias.view(1, self.num_features, 1, 1) 18 | -------------------------------------------------------------------------------- /models/ops/batch_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | from torch import nn 4 | from torch.autograd.function import Function 5 | 6 | from utils.misc import get_world_size 7 | 8 | 9 | class FrozenBatchNorm2d(nn.Module): 10 | """ 11 | BatchNorm2d where the batch statistics and the affine parameters 12 | are fixed 13 | """ 14 | 15 | def __init__(self, n): 16 | super(FrozenBatchNorm2d, self).__init__() 17 | self.register_buffer("weight", torch.ones(n)) 18 | self.register_buffer("bias", torch.zeros(n)) 19 | self.register_buffer("running_mean", torch.zeros(n)) 20 | self.register_buffer("running_var", torch.ones(n)) 21 | 22 | def forward(self, x): 23 | # Cast all fixed parameters to half() if necessary 24 | if x.dtype == torch.float16: 25 | self.weight = self.weight.half() 26 | self.bias = self.bias.half() 27 | self.running_mean = self.running_mean.half() 28 | self.running_var = self.running_var.half() 29 | 30 | scale = self.weight * self.running_var.rsqrt() 31 | bias = self.bias - self.running_mean * scale 32 | scale = scale.reshape(1, -1, 1, 1) 33 | bias = bias.reshape(1, -1, 1, 1) 34 | return x * scale + bias 35 | 36 | 37 | class AllReduce(Function): 38 | @staticmethod 39 | def forward(ctx, input): 40 | input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())] 41 | # Use allgather instead of allreduce since I don't trust in-place operations .. 42 | dist.all_gather(input_list, input, async_op=False) 43 | inputs = torch.stack(input_list, dim=0) 44 | return torch.sum(inputs, dim=0) 45 | 46 | @staticmethod 47 | def backward(ctx, grad_output): 48 | dist.all_reduce(grad_output, async_op=False) 49 | return grad_output 50 | 51 | 52 | class NaiveSyncBatchNorm(nn.BatchNorm2d): 53 | """ 54 | This function is taken from the detectron2 repo. 55 | It can be seen here: 56 | https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/batch_norm.py 57 | 58 | `torch.nn.SyncBatchNorm` has known unknown bugs. 59 | It produces significantly worse AP (and sometimes goes NaN) 60 | when the batch size on each worker is quite different 61 | (e.g., when scale augmentation is used, or when it is applied to mask head). 62 | Use this implementation before `nn.SyncBatchNorm` is fixed. 63 | It is slower than `nn.SyncBatchNorm`. 64 | """ 65 | 66 | def forward(self, input): 67 | if get_world_size() == 1 or not self.training: 68 | return super().forward(input) 69 | 70 | assert input.shape[0] > 0, "SyncBatchNorm does not support empty inputs" 71 | C = input.shape[1] 72 | mean = torch.mean(input, dim=[0, 2, 3]) 73 | meansqr = torch.mean(input * input, dim=[0, 2, 3]) 74 | 75 | vec = torch.cat([mean, meansqr], dim=0) 76 | vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size()) 77 | 78 | mean, meansqr = torch.split(vec, C) 79 | var = meansqr - mean * mean 80 | self.running_mean += self.momentum * (mean.detach() - self.running_mean) 81 | self.running_var += self.momentum * (var.detach() - self.running_var) 82 | 83 | invstd = torch.rsqrt(var + self.eps) 84 | scale = self.weight * invstd 85 | bias = self.bias - mean * scale 86 | scale = scale.reshape(1, -1, 1, 1) 87 | bias = bias.reshape(1, -1, 1, 1) 88 | return input * scale + bias 89 | -------------------------------------------------------------------------------- /models/ops/bilinear_interpolation2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class BilinearInterpolation2d(nn.Module): 9 | """Bilinear interpolation in space of scale. 10 | 11 | Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale 12 | 13 | Adapted from the CVPR'15 FCN code. 14 | See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py 15 | """ 16 | def __init__(self, in_channels, out_channels, up_scale): 17 | super().__init__() 18 | assert in_channels == out_channels 19 | assert up_scale % 2 == 0, 'Scale should be even' 20 | self.in_channes = in_channels 21 | self.out_channels = out_channels 22 | self.up_scale = int(up_scale) 23 | self.padding = up_scale // 2 24 | 25 | def upsample_filt(size): 26 | factor = (size + 1) // 2 27 | if size % 2 == 1: 28 | center = factor - 1 29 | else: 30 | center = factor - 0.5 31 | og = np.ogrid[:size, :size] 32 | return ((1 - abs(og[0] - center) / factor) * 33 | (1 - abs(og[1] - center) / factor)) 34 | 35 | kernel_size = up_scale * 2 36 | bil_filt = upsample_filt(kernel_size) 37 | 38 | kernel = np.zeros( 39 | (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32 40 | ) 41 | kernel[range(in_channels), range(out_channels), :, :] = bil_filt 42 | 43 | self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, 44 | stride=self.up_scale, padding=self.padding) 45 | 46 | self.upconv.weight.data.copy_(torch.from_numpy(kernel)) 47 | self.upconv.bias.data.fill_(0) 48 | self.upconv.weight.requires_grad = False 49 | self.upconv.bias.requires_grad = False 50 | 51 | def forward(self, x): 52 | return self.upconv(x) 53 | -------------------------------------------------------------------------------- /models/ops/context_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creates a GCB Model as defined in: 3 | Yue Cao, Jiarui Xu, Stephen Lin, Fangyun Wei, Han Hu. (2019 Arxiv). 4 | GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. 5 | Copyright (c) Yang Lu, 2019 6 | """ 7 | import torch 8 | from torch import nn 9 | 10 | 11 | def constant_init(module, val, bias=0): 12 | nn.init.constant_(module.weight, val) 13 | if hasattr(module, 'bias') and module.bias is not None: 14 | nn.init.constant_(module.bias, bias) 15 | 16 | 17 | def last_zero_init(m): 18 | if isinstance(m, nn.Sequential): 19 | constant_init(m[-1], val=0) 20 | else: 21 | constant_init(m, val=0) 22 | 23 | 24 | class GlobalContextBlock(nn.Module): 25 | def __init__(self, inplanes, innerplanse, pooling_type='att', fusion_types=('channel_add', )): 26 | super(GlobalContextBlock, self).__init__() 27 | assert pooling_type in ['avg', 'att'] 28 | assert isinstance(fusion_types, (list, tuple)) 29 | valid_fusion_types = ['channel_add', 'channel_mul'] 30 | assert all([f in valid_fusion_types for f in fusion_types]) 31 | assert len(fusion_types) > 0, 'at least one fusion should be used' 32 | self.inplanes = inplanes 33 | self.innerplanse = innerplanse 34 | self.pooling_type = pooling_type 35 | self.fusion_types = fusion_types 36 | if pooling_type == 'att': 37 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) 38 | self.softmax = nn.Softmax(dim=2) 39 | else: 40 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 41 | if 'channel_add' in fusion_types: 42 | self.channel_add_conv = nn.Sequential( 43 | nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1), 44 | nn.LayerNorm([self.innerplanse, 1, 1]), 45 | nn.ReLU(inplace=True), 46 | nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1) 47 | ) 48 | else: 49 | self.channel_add_conv = None 50 | if 'channel_mul' in fusion_types: 51 | self.channel_mul_conv = nn.Sequential( 52 | nn.Conv2d(self.inplanes, self.innerplanse, kernel_size=1), 53 | nn.LayerNorm([self.innerplanse, 1, 1]), 54 | nn.ReLU(inplace=True), 55 | nn.Conv2d(self.innerplanse, self.inplanes, kernel_size=1) 56 | ) 57 | else: 58 | self.channel_mul_conv = None 59 | self.reset_parameters() 60 | 61 | def reset_parameters(self): 62 | if self.pooling_type == 'att': 63 | nn.init.kaiming_normal_(self.conv_mask.weight, mode='fan_in', nonlinearity='relu') 64 | self.conv_mask.inited = True 65 | 66 | if self.channel_add_conv is not None: 67 | last_zero_init(self.channel_add_conv) 68 | if self.channel_mul_conv is not None: 69 | last_zero_init(self.channel_mul_conv) 70 | 71 | def spatial_pool(self, x): 72 | batch, channel, height, width = x.size() 73 | if self.pooling_type == 'att': 74 | input_x = x 75 | # [N, C, H * W] 76 | input_x = input_x.view(batch, channel, height * width) 77 | # [N, 1, C, H * W] 78 | input_x = input_x.unsqueeze(1) 79 | # [N, 1, H, W] 80 | context_mask = self.conv_mask(x) 81 | # [N, 1, H * W] 82 | context_mask = context_mask.view(batch, 1, height * width) 83 | # [N, 1, H * W] 84 | context_mask = self.softmax(context_mask) 85 | # [N, 1, H * W, 1] 86 | context_mask = context_mask.unsqueeze(-1) 87 | # [N, 1, C, 1] 88 | context = torch.matmul(input_x, context_mask) 89 | # [N, C, 1, 1] 90 | context = context.view(batch, channel, 1, 1) 91 | else: 92 | # [N, C, 1, 1] 93 | context = self.avg_pool(x) 94 | 95 | return context 96 | 97 | def forward(self, x): 98 | # [N, C, 1, 1] 99 | context = self.spatial_pool(x) 100 | 101 | out = x 102 | if self.channel_mul_conv is not None: 103 | # [N, C, 1, 1] 104 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) 105 | out = out * channel_mul_term 106 | if self.channel_add_conv is not None: 107 | # [N, C, 1, 1] 108 | channel_add_term = self.channel_add_conv(context) 109 | out = out + channel_add_term 110 | 111 | return out 112 | -------------------------------------------------------------------------------- /models/ops/conv2d_samepadding.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | 7 | class Conv2dSamePadding(nn.Conv2d): 8 | """ 2D Convolutions like TensorFlow """ 9 | 10 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 11 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 12 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 13 | 14 | def forward(self, x): 15 | ih, iw = x.size()[-2:] 16 | kh, kw = self.weight.size()[-2:] 17 | sh, sw = self.stride 18 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 19 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 20 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 21 | if pad_h > 0 or pad_w > 0: 22 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) 23 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 24 | -------------------------------------------------------------------------------- /models/ops/conv2d_ws.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | 5 | class Conv2dWS(nn.Conv2d): 6 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 7 | padding=0, dilation=1, groups=1, bias=True): 8 | super(Conv2dWS, self).__init__(in_channels, out_channels, kernel_size, stride, 9 | padding, dilation, groups, bias) 10 | 11 | def forward(self, x): 12 | # return super(Conv2d, self).forward(x) 13 | weight = self.weight 14 | weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True) 15 | weight = weight - weight_mean 16 | std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5 17 | weight = weight / std.expand_as(weight) 18 | return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 19 | -------------------------------------------------------------------------------- /models/ops/csrc/PoolPointsInterp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef WITH_CUDA 4 | #include "cuda/vision.h" 5 | #endif 6 | 7 | // Interface for Python 8 | at::Tensor PoolPointsInterp_forward(const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale) { 11 | if (input.type().is_cuda()) { 12 | #ifdef WITH_CUDA 13 | return PoolPointsInterp_forward_cuda(input, rois, spatial_scale); 14 | #else 15 | AT_ERROR("Not compiled with GPU support"); 16 | #endif 17 | } 18 | } 19 | 20 | at::Tensor PoolPointsInterp_backward(const at::Tensor& grad, 21 | const at::Tensor& rois, 22 | const float spatial_scale, 23 | const int batch_size, 24 | const int channels, 25 | const int height, 26 | const int width) { 27 | if (grad.type().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return PoolPointsInterp_backward_cuda(grad, rois, spatial_scale, batch_size, channels, height, width); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | AT_ERROR("Not implemented on the CPU"); 35 | } 36 | 37 | -------------------------------------------------------------------------------- /models/ops/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio, 17 | bool aligned) { 18 | if (input.type().is_cuda()) { 19 | #ifdef WITH_CUDA 20 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned); 26 | } 27 | 28 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 29 | const at::Tensor& rois, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width, 37 | const int sampling_ratio, 38 | bool aligned) { 39 | if (grad.type().is_cuda()) { 40 | #ifdef WITH_CUDA 41 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, aligned); 42 | #else 43 | AT_ERROR("Not compiled with GPU support"); 44 | #endif 45 | } 46 | AT_ERROR("Not implemented on the CPU"); 47 | } 48 | 49 | -------------------------------------------------------------------------------- /models/ops/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /models/ops/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /models/ops/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /models/ops/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio, 12 | bool aligned); 13 | 14 | 15 | at::Tensor nms_cpu(const at::Tensor& dets, 16 | const at::Tensor& scores, 17 | const float threshold); 18 | -------------------------------------------------------------------------------- /models/ops/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /models/ops/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /models/ops/csrc/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor ml_nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const at::Tensor& labels, 13 | const float threshold) { 14 | 15 | if (dets.type().is_cuda()) { 16 | #ifdef WITH_CUDA 17 | // TODO raise error if not compiled with CUDA 18 | if (dets.numel() == 0) 19 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 20 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 21 | return ml_nms_cuda(b, threshold); 22 | #else 23 | AT_ERROR("Not compiled with GPU support"); 24 | #endif 25 | } 26 | AT_ERROR("CPU version not implemented"); 27 | } 28 | -------------------------------------------------------------------------------- /models/ops/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /models/ops/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | #include "nms.h" 2 | #include "ml_nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | #include "deform_conv.h" 7 | #include "deform_pool.h" 8 | #include "PoolPointsInterp.h" 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("nms", &nms, "non-maximum suppression"); 12 | m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression"); 13 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 14 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 15 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 16 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 17 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 18 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 19 | m.def("pool_points_interp_forward", &PoolPointsInterp_forward, "PoolPointsInterp_forward"); 20 | m.def("pool_points_interp_backward", &PoolPointsInterp_backward, "PoolPointsInterp_backward"); 21 | // dcn-v2 22 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 23 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 24 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 25 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 26 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 27 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 28 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 29 | } 30 | -------------------------------------------------------------------------------- /models/ops/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /models/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # -------------------------------------------------------------------------------- /models/ops/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from models.ops import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /models/ops/dropblock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | class DropBlock2D(nn.Module): 7 | r"""Randomly zeroes spatial blocks of the input tensor. 8 | As described in the paper 9 | `DropBlock: A regularization method for convolutional networks`_ , 10 | dropping whole blocks of feature map allows to remove semantic 11 | information as compared to regular dropout. 12 | Args: 13 | keep_prob (float, optional): probability of an element to be kept. 14 | Authors recommend to linearly decrease this value from 1 to desired 15 | value. 16 | block_size (int, optional): size of the block. Block size in paper 17 | usually equals last feature map dimensions. 18 | Shape: 19 | - Input: :math:`(N, C, H, W)` 20 | - Output: :math:`(N, C, H, W)` (same shape as input) 21 | .. _DropBlock: A regularization method for convolutional networks: 22 | https://arxiv.org/abs/1810.12890 23 | """ 24 | 25 | def __init__(self, keep_prob=0.9, block_size=7): 26 | super(DropBlock2D, self).__init__() 27 | self.keep_prob = keep_prob 28 | self.block_size = block_size 29 | 30 | def forward(self, input): 31 | if not self.training or self.keep_prob == 1: 32 | return input 33 | gamma = (1. - self.keep_prob) / self.block_size ** 2 34 | for sh in input.shape[2:]: 35 | gamma *= sh / (sh - self.block_size + 1) 36 | M = torch.bernoulli(torch.ones_like(input) * gamma) 37 | Msum = F.conv2d(M, 38 | torch.ones((input.shape[1], 1, self.block_size, self.block_size)).to(device=input.device, 39 | dtype=input.dtype), 40 | padding=self.block_size // 2, 41 | groups=input.shape[1]) 42 | torch.set_printoptions(threshold=5000) 43 | mask = (Msum < 1).to(device=input.device, dtype=input.dtype) 44 | return input * mask * mask.numel() /mask.sum() #TODO input * mask * self.keep_prob ? 45 | 46 | -------------------------------------------------------------------------------- /models/ops/iou_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class IOULoss(nn.Module): 6 | def __init__(self, loc_loss_type): 7 | super(IOULoss, self).__init__() 8 | self.loc_loss_type = loc_loss_type 9 | 10 | def forward(self, pred, target, weight=None): 11 | pred_left = pred[:, 0] 12 | pred_top = pred[:, 1] 13 | pred_right = pred[:, 2] 14 | pred_bottom = pred[:, 3] 15 | 16 | target_left = target[:, 0] 17 | target_top = target[:, 1] 18 | target_right = target[:, 2] 19 | target_bottom = target[:, 3] 20 | 21 | target_area = (target_left + target_right) * (target_top + target_bottom) 22 | pred_area = (pred_left + pred_right) * (pred_top + pred_bottom) 23 | 24 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right) 25 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top) 26 | g_w_intersect = torch.max(pred_left, target_left) + torch.max(pred_right, target_right) 27 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top) 28 | 29 | area_intersect = w_intersect * h_intersect 30 | area_union = target_area + pred_area - area_intersect 31 | ac_uion = g_w_intersect * g_h_intersect + 1e-7 32 | 33 | ious = (area_intersect + 1.0) / (area_union + 1.0) 34 | gious = ious - (ac_uion - area_union) / ac_uion 35 | 36 | if self.loc_loss_type == 'iou': 37 | losses = -torch.log(ious) 38 | elif self.loc_loss_type == 'liou': 39 | losses = 1 - ious 40 | elif self.loc_loss_type == 'giou': 41 | losses = 1 - gious 42 | else: 43 | raise NotImplementedError 44 | 45 | if weight is not None and weight.sum() > 0: 46 | return (losses * weight).sum() 47 | else: 48 | assert losses.numel() != 0 49 | return losses.sum() 50 | -------------------------------------------------------------------------------- /models/ops/l2_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def l2_loss(input, target): 5 | """ 6 | very similar to the smooth_l1_loss from pytorch, but with 7 | the extra beta parameter 8 | """ 9 | pos_inds = torch.nonzero(target > 0.0).squeeze(1) 10 | if pos_inds.shape[0] > 0: 11 | cond = torch.abs(input[pos_inds] - target[pos_inds]) 12 | loss = 0.5 * cond ** 2 / pos_inds.shape[0] 13 | else: 14 | loss = input * 0.0 15 | return loss.sum() 16 | -------------------------------------------------------------------------------- /models/ops/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | 5 | 6 | class L2Norm(nn.Module): 7 | def __init__(self, n_channels, scale): 8 | super(L2Norm, self).__init__() 9 | self.n_channels = n_channels 10 | self.gamma = scale or None 11 | self.eps = 1e-10 12 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 13 | self.reset_parameters() 14 | 15 | def reset_parameters(self): 16 | init.constant_(self.weight, self.gamma) 17 | 18 | def forward(self, x): 19 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps 20 | x = x / norm 21 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 22 | return out 23 | -------------------------------------------------------------------------------- /models/ops/label_smoothing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LabelSmoothing(nn.Module): 6 | """ 7 | NLL loss with label smoothing. 8 | """ 9 | 10 | def __init__(self, smoothing=0.0): 11 | """ 12 | Constructor for the LabelSmoothing module. 13 | :param smoothing: label smoothing factor 14 | """ 15 | super(LabelSmoothing, self).__init__() 16 | self.confidence = 1.0 - smoothing 17 | self.smoothing = smoothing 18 | 19 | def forward(self, x, target): 20 | logprobs = torch.nn.functional.log_softmax(x, dim=-1) 21 | 22 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 23 | nll_loss = nll_loss.squeeze(1) 24 | smooth_loss = -logprobs.mean(dim=-1) 25 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 26 | return loss.mean() 27 | -------------------------------------------------------------------------------- /models/ops/lovasz_hinge_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | 7 | 8 | def lovasz_grad(gt_sorted): 9 | """ 10 | Computes gradient of the Lovasz extension w.r.t sorted errors 11 | See Alg. 1 in paper 12 | """ 13 | p = len(gt_sorted) 14 | gts = gt_sorted.sum() 15 | intersection = gts - gt_sorted.float().cumsum(0) 16 | union = gts + (1 - gt_sorted).float().cumsum(0) 17 | jaccard = 1. - intersection / union 18 | if p > 1: # cover 1-pixel case 19 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] 20 | return jaccard 21 | 22 | 23 | class LovaszHinge(nn.Module): 24 | def __init__(self, reduction='mean'): 25 | super(LovaszHinge, self).__init__() 26 | self.reduction = reduction 27 | 28 | def flatten(self, input, target, mask=None): 29 | if mask is None: 30 | input_flatten = input.view(-1) 31 | target_flatten = target.view(-1) 32 | else: 33 | input_flatten = input[mask].view(-1) 34 | target_flatten = target[mask].view(-1) 35 | return input_flatten, target_flatten 36 | 37 | def lovasz_hinge_flat(self, logits, labels): 38 | """ 39 | Binary Lovasz hinge loss 40 | logits: [P] Variable, logits at each prediction (between -\infty and +\infty) 41 | labels: [P] Tensor, binary ground truth labels (0 or 1) 42 | ignore: label to ignore 43 | """ 44 | if len(labels) == 0: 45 | # only void pixels, the gradients should be 0 46 | return logits.sum() * 0. 47 | signs = 2. * labels.float() - 1. 48 | errors = (1. - logits * Variable(signs)) 49 | errors_sorted, perm = torch.sort(errors, dim=0, descending=True) 50 | perm = perm.data 51 | gt_sorted = labels[perm] 52 | grad = lovasz_grad(gt_sorted) 53 | loss = torch.dot(F.relu(errors_sorted), Variable(grad)) 54 | return loss 55 | 56 | def forward(self, inputs, targets, mask=None, act=False): 57 | losses = [] 58 | for id in range(len(inputs)): 59 | if mask is not None: 60 | input_flatten, target_flatten = self.flatten(inputs[id], targets[id], mask[id]) 61 | else: 62 | input_flatten, target_flatten = self.flatten(inputs[id], targets[id]) 63 | if act: 64 | # map [0, 1] to [-inf, inf] 65 | input_flatten = torch.log(input_flatten) - torch.log(1 - input_flatten) 66 | losses.append(self.lovasz_hinge_flat(input_flatten, target_flatten)) 67 | losses = torch.stack(losses).to(device=inputs.device) 68 | if self.reduction == "mean": 69 | losses = losses.mean() 70 | elif self.reduction == "sum": 71 | losses = losses.sum() 72 | 73 | return losses 74 | -------------------------------------------------------------------------------- /models/ops/misc.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | from torch.nn.modules.utils import _ntuple 5 | 6 | 7 | class _NewEmptyTensorOp(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, x, new_shape): 10 | ctx.shape = x.shape 11 | return x.new_empty(new_shape) 12 | 13 | @staticmethod 14 | def backward(ctx, grad): 15 | shape = ctx.shape 16 | return _NewEmptyTensorOp.apply(grad, shape), None 17 | 18 | 19 | class Conv2d(torch.nn.Conv2d): 20 | def forward(self, x): 21 | if x.numel() > 0: 22 | return super(Conv2d, self).forward(x) 23 | # get output shape 24 | 25 | output_shape = [ 26 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 27 | for i, p, di, k, d in zip( 28 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride 29 | ) 30 | ] 31 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 32 | return _NewEmptyTensorOp.apply(x, output_shape) 33 | 34 | 35 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 36 | def forward(self, x): 37 | if x.numel() > 0: 38 | return super(ConvTranspose2d, self).forward(x) 39 | # get output shape 40 | 41 | output_shape = [ 42 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 43 | for i, p, di, k, d, op in zip( 44 | x.shape[-2:], 45 | self.padding, 46 | self.dilation, 47 | self.kernel_size, 48 | self.stride, 49 | self.output_padding, 50 | ) 51 | ] 52 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape 53 | return _NewEmptyTensorOp.apply(x, output_shape) 54 | 55 | 56 | class BatchNorm2d(torch.nn.BatchNorm2d): 57 | def forward(self, x): 58 | if x.numel() > 0: 59 | return super(BatchNorm2d, self).forward(x) 60 | # get output shape 61 | output_shape = x.shape 62 | return _NewEmptyTensorOp.apply(x, output_shape) 63 | 64 | 65 | def interpolate(x, size=None, scale_factor=None, mode="nearest", align_corners=None): 66 | if x.numel() > 0: 67 | return torch.nn.functional.interpolate( 68 | x, size, scale_factor, mode, align_corners 69 | ) 70 | 71 | def _check_size_scale_factor(dim): 72 | if size is None and scale_factor is None: 73 | raise ValueError("either size or scale_factor should be defined") 74 | if size is not None and scale_factor is not None: 75 | raise ValueError("only one of size or scale_factor should be defined") 76 | if ( 77 | scale_factor is not None 78 | and isinstance(scale_factor, tuple) 79 | and len(scale_factor) != dim 80 | ): 81 | raise ValueError( 82 | "scale_factor shape must match input shape. " 83 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) 84 | ) 85 | 86 | def _output_size(dim): 87 | _check_size_scale_factor(dim) 88 | if size is not None: 89 | return size 90 | scale_factors = _ntuple(dim)(scale_factor) 91 | # math.floor might return float in py2.7 92 | return [ 93 | int(math.floor(x.size(i + 2) * scale_factors[i])) for i in range(dim) 94 | ] 95 | 96 | output_shape = tuple(_output_size(2)) 97 | output_shape = x.shape[:-2] + output_shape 98 | return _NewEmptyTensorOp.apply(x, output_shape) 99 | -------------------------------------------------------------------------------- /models/ops/mixture_batchnorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class H_Sigmoid(nn.Module): 7 | def forward(self, x): 8 | out = F.relu6(x + 3, inplace=True) / 6 9 | return out 10 | 11 | 12 | def make_norm(c, norm='bn', group=1, eps=1e-5): 13 | if norm == 'bn': 14 | return nn.BatchNorm2d(c, eps=eps) 15 | elif norm == 'gn': 16 | assert c % group == 0 17 | return nn.GroupNorm(group, c, eps=eps) 18 | elif norm == 'none': 19 | return None 20 | else: 21 | return nn.BatchNorm2d(c, eps=eps) 22 | 23 | 24 | class AttentionWeights(nn.Module): 25 | expansion = 2 26 | 27 | def __init__(self, num_channels, k, norm=None, groups=1, use_hsig=True): 28 | super(AttentionWeights, self).__init__() 29 | # num_channels *= 2 30 | self.k = k 31 | self.avgpool = nn.AdaptiveAvgPool2d(1) 32 | self.attention = nn.Sequential( 33 | nn.Conv2d(num_channels, k, 1, bias=False), 34 | make_norm(k, norm, groups), 35 | H_Sigmoid() if use_hsig else nn.Sigmoid() 36 | ) 37 | 38 | def forward(self, x): 39 | b, c, _, _ = x.size() 40 | y = self.avgpool(x) # .view(b, c) 41 | var = torch.var(x, dim=(2, 3)).view(b, c, 1, 1) 42 | y *= (var + 1e-3).rsqrt() 43 | # y = torch.cat((y, var), dim=1) 44 | return self.attention(y).view(b, self.k) 45 | 46 | 47 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ? 48 | class MixtureBatchNorm2d(nn.BatchNorm2d): 49 | def __init__(self, num_channels, k, eps=1e-5, momentum=0.1, track_running_stats=True): 50 | super(MixtureBatchNorm2d, self).__init__( 51 | num_channels, eps=eps, momentum=momentum, affine=False, track_running_stats=track_running_stats) 52 | self.k = k 53 | self.weight_ = nn.Parameter(torch.Tensor(k, num_channels)) 54 | self.bias_ = nn.Parameter(torch.Tensor(k, num_channels)) 55 | 56 | self.attention_weights = AttentionWeights(num_channels, k, norm='bn') 57 | 58 | self._init_params() 59 | 60 | def _init_params(self): 61 | nn.init.normal_(self.weight_, 1, 0.1) 62 | nn.init.normal_(self.bias_, 0, 0.1) 63 | 64 | def forward(self, x): 65 | output = super(MixtureBatchNorm2d, self).forward(x) 66 | size = output.size() 67 | y = self.attention_weights(x) # bxk # or use output as attention input 68 | 69 | weight = y @ self.weight_ # bxc 70 | bias = y @ self.bias_ # bxc 71 | weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size) 72 | bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size) 73 | 74 | return weight * output + bias 75 | 76 | 77 | # Modified on top of nn.GroupNorm 78 | # TODO: keep it to use FP32 always, need to figure out how to set it using apex ? 79 | class MixtureGroupNorm(nn.Module): 80 | __constants__ = ['num_groups', 'num_channels', 'k', 'eps', 'weight', 'bias'] 81 | 82 | def __init__(self, num_channels, num_groups, k, eps=1e-5): 83 | super(MixtureGroupNorm, self).__init__() 84 | self.num_groups = num_groups 85 | self.num_channels = num_channels 86 | self.k = k 87 | self.eps = eps 88 | self.affine = True 89 | self.weight_ = nn.Parameter(torch.Tensor(k, num_channels)) 90 | self.bias_ = nn.Parameter(torch.Tensor(k, num_channels)) 91 | self.register_parameter('weight', None) 92 | self.register_parameter('bias', None) 93 | 94 | self.attention_weights = AttentionWeights(num_channels, k, norm='gn', groups=1) 95 | 96 | self.reset_parameters() 97 | 98 | def reset_parameters(self): 99 | nn.init.normal_(self.weight_, 1, 0.1) 100 | nn.init.normal_(self.bias_, 0, 0.1) 101 | 102 | def forward(self, x): 103 | output = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 104 | size = output.size() 105 | 106 | y = self.attention_weights(x) # TODO: use output as attention input 107 | 108 | weight = y @ self.weight_ 109 | bias = y @ self.bias_ 110 | 111 | weight = weight.unsqueeze(-1).unsqueeze(-1).expand(size) 112 | bias = bias.unsqueeze(-1).unsqueeze(-1).expand(size) 113 | 114 | return weight * output + bias 115 | 116 | def extra_repr(self): 117 | return '{num_groups}, {num_channels}, eps={eps}, ' \ 118 | 'affine={affine}'.format(**self.__dict__) 119 | -------------------------------------------------------------------------------- /models/ops/nms.py: -------------------------------------------------------------------------------- 1 | from models.ops import _C 2 | 3 | from apex import amp 4 | 5 | # Only valid with fp32 inputs - give AMP the hint 6 | nms = amp.float_function(_C.nms) 7 | ml_nms = amp.float_function(_C.ml_nms) 8 | 9 | # nms.__doc__ = """ 10 | # This function performs Non-maximum suppresion""" 11 | -------------------------------------------------------------------------------- /models/ops/pool_points_interp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from models.ops import _C 7 | 8 | from apex import amp 9 | 10 | 11 | class _PoolPointsInterp(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, spatial_scale): 14 | ctx.save_for_backward(roi) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output = _C.pool_points_interp_forward( 18 | input, roi, spatial_scale) 19 | return output 20 | 21 | @staticmethod 22 | @once_differentiable 23 | def backward(ctx, grad_output): 24 | rois, = ctx.saved_tensors 25 | spatial_scale = ctx.spatial_scale 26 | bs, ch, h, w = ctx.input_shape 27 | grad_input = _C.pool_points_interp_backward( 28 | grad_output, 29 | rois, 30 | spatial_scale, 31 | bs, 32 | ch, 33 | h, 34 | w, 35 | ) 36 | return grad_input, None, None 37 | 38 | 39 | pool_points_interp = _PoolPointsInterp.apply 40 | 41 | 42 | class PoolPointsInterp(nn.Module): 43 | def __init__(self, spatial_scale=1.0): 44 | super(PoolPointsInterp, self).__init__() 45 | self.spatial_scale = spatial_scale 46 | 47 | @amp.float_function 48 | def forward(self, input, rois): 49 | return pool_points_interp(input, rois, self.spatial_scale) 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ + "(" 53 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 54 | tmpstr += ")" 55 | return tmpstr 56 | -------------------------------------------------------------------------------- /models/ops/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | def __init__(self, init_value=1.0): 7 | super(Scale, self).__init__() 8 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 9 | 10 | def forward(self, input): 11 | return input * self.scale 12 | -------------------------------------------------------------------------------- /models/ops/setup_rcnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import glob 4 | import os 5 | 6 | import torch 7 | from setuptools import find_packages 8 | from setuptools import setup 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | from torch.utils.cpp_extension import CppExtension 11 | from torch.utils.cpp_extension import CUDAExtension 12 | 13 | requirements = ["torch", "torchvision"] 14 | 15 | 16 | def get_extensions(): 17 | this_dir = os.path.dirname(os.path.abspath(__file__)) 18 | extensions_dir = os.path.join(this_dir, "csrc") 19 | 20 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 21 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 22 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 23 | 24 | sources = main_file + source_cpu 25 | extension = CppExtension 26 | 27 | extra_compile_args = {"cxx": []} 28 | define_macros = [] 29 | 30 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 31 | extension = CUDAExtension 32 | sources += source_cuda 33 | define_macros += [("WITH_CUDA", None)] 34 | extra_compile_args["nvcc"] = [ 35 | "-DCUDA_HAS_FP16=1", 36 | "-D__CUDA_NO_HALF_OPERATORS__", 37 | "-D__CUDA_NO_HALF_CONVERSIONS__", 38 | "-D__CUDA_NO_HALF2_OPERATORS__", 39 | ] 40 | 41 | sources = [os.path.join(extensions_dir, s) for s in sources] 42 | 43 | include_dirs = [extensions_dir] 44 | 45 | ext_modules = [ 46 | extension( 47 | "_C", 48 | sources, 49 | include_dirs=include_dirs, 50 | define_macros=define_macros, 51 | extra_compile_args=extra_compile_args, 52 | ) 53 | ] 54 | 55 | return ext_modules 56 | 57 | 58 | setup( 59 | name="pet", 60 | ext_modules=get_extensions(), 61 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 62 | ) 63 | -------------------------------------------------------------------------------- /models/ops/setup_ssd.py: -------------------------------------------------------------------------------- 1 | from Cython.Build import cythonize 2 | from Cython.Distutils import build_ext 3 | from setuptools import Extension 4 | from setuptools import setup 5 | 6 | import numpy as np 7 | 8 | 9 | # Obtain the numpy include directory. This logic works across numpy versions. 10 | try: 11 | numpy_include = np.get_include() 12 | except AttributeError: 13 | numpy_include = np.get_numpy_include() 14 | 15 | 16 | ext_modules = [ 17 | Extension( 18 | name='cython_bbox', 19 | sources=['cython_bbox.pyx'], 20 | extra_compile_args=['-Wno-cpp'], 21 | include_dirs=[numpy_include] 22 | ), 23 | Extension( 24 | name='cython_nms', 25 | sources=['cython_nms.pyx'], 26 | extra_compile_args=['-Wno-cpp'], 27 | include_dirs=[numpy_include] 28 | ) 29 | ] 30 | 31 | setup( 32 | name='pet', 33 | ext_modules=cythonize(ext_modules) 34 | ) 35 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from models.ops import _C 7 | 8 | 9 | # TODO: Use JIT to replace CUDA implementation in the future. 10 | class _SigmoidFocalLoss(Function): 11 | @staticmethod 12 | def forward(ctx, logits, targets, gamma, alpha): 13 | ctx.save_for_backward(logits, targets) 14 | num_classes = logits.shape[1] 15 | ctx.num_classes = num_classes 16 | ctx.gamma = gamma 17 | ctx.alpha = alpha 18 | 19 | losses = _C.sigmoid_focalloss_forward( 20 | logits, targets, num_classes, gamma, alpha 21 | ) 22 | return losses 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, d_loss): 27 | logits, targets = ctx.saved_tensors 28 | num_classes = ctx.num_classes 29 | gamma = ctx.gamma 30 | alpha = ctx.alpha 31 | d_loss = d_loss.contiguous() 32 | d_logits = _C.sigmoid_focalloss_backward( 33 | logits, targets, d_loss, num_classes, gamma, alpha 34 | ) 35 | return d_logits, None, None, None, None 36 | 37 | 38 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 39 | 40 | 41 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 42 | num_classes = logits.shape[1] 43 | gamma = gamma[0] 44 | alpha = alpha[0] 45 | dtype = targets.dtype 46 | device = targets.device 47 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 48 | 49 | t = targets.unsqueeze(1) 50 | p = torch.sigmoid(logits) 51 | term1 = (1 - p) ** gamma * torch.log(p) 52 | term2 = p ** gamma * torch.log(1 - p) 53 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 54 | 55 | 56 | class SigmoidFocalLoss(nn.Module): 57 | def __init__(self, gamma, alpha): 58 | super(SigmoidFocalLoss, self).__init__() 59 | self.gamma = gamma 60 | self.alpha = alpha 61 | 62 | def forward(self, logits, targets): 63 | device = logits.device 64 | if logits.is_cuda: 65 | loss_func = sigmoid_focal_loss_cuda 66 | else: 67 | loss_func = sigmoid_focal_loss_cpu 68 | 69 | loss = loss_func(logits, targets, self.gamma, self.alpha) 70 | return loss.sum() 71 | 72 | def __repr__(self): 73 | tmpstr = self.__class__.__name__ + "(" 74 | tmpstr += "gamma=" + str(self.gamma) 75 | tmpstr += ", alpha=" + str(self.alpha) 76 | tmpstr += ")" 77 | return tmpstr 78 | -------------------------------------------------------------------------------- /models/ops/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 5 | """ 6 | very similar to the smooth_l1_loss from pytorch, but with 7 | the extra beta parameter 8 | 9 | Modified according to detectron2's fvcore, 10 | refer to https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py 11 | """ 12 | if beta < 1e-5: 13 | # if beta == 0, then torch.where will result in nan gradients when 14 | # the chain rule is applied due to pytorch implementation details 15 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 16 | # zeros, rather than "no gradient"). To avoid this issue, we define 17 | # small values of beta to be exactly l1 loss. 18 | loss = torch.abs(input - target) 19 | else: 20 | n = torch.abs(input - target) 21 | cond = n < beta 22 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 23 | 24 | if size_average: 25 | return loss.mean() 26 | return loss.sum() 27 | 28 | 29 | def smooth_l1_loss_LW(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0): 30 | """ 31 | SmoothL1(x) = 0.5 * x^2 / beta if |x| < beta 32 | |x| - 0.5 * beta otherwise. 33 | 1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])). 34 | N is the number of batch elements in the input predictions 35 | """ 36 | box_diff = bbox_pred - bbox_targets 37 | in_box_diff = bbox_inside_weights * box_diff 38 | abs_in_box_diff = torch.abs(in_box_diff) 39 | smoothL1_sign = (abs_in_box_diff < beta).detach().float() 40 | in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \ 41 | (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta)) 42 | out_loss_box = bbox_outside_weights * in_loss_box 43 | loss_box = out_loss_box 44 | N = loss_box.size(0) # batch size 45 | loss_box = loss_box.view(-1).sum(0) / N 46 | return loss_box -------------------------------------------------------------------------------- /models/ops/squeeze_excitation.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class SeConv2d(nn.Module): 5 | def __init__(self, inplanes, innerplanse, activation=nn.ReLU): 6 | super(SeConv2d, self).__init__() 7 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 8 | self.conv = nn.Sequential( 9 | nn.Conv2d(inplanes, innerplanse, kernel_size=1), 10 | activation(), 11 | nn.Conv2d(innerplanse, inplanes, kernel_size=1), 12 | nn.Sigmoid() 13 | ) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | # weight initialization 18 | for m in self.modules(): 19 | if isinstance(m, nn.Conv2d): 20 | nn.init.constant_(m.weight, 0) 21 | if m.bias is not None: 22 | nn.init.zeros_(m.bias) 23 | 24 | def forward(self, x): 25 | n, c, _, _ = x.size() 26 | y = self.avg_pool(x) 27 | y = self.conv(y) 28 | return x * y 29 | 30 | 31 | class GDWSe2d(nn.Module): 32 | def __init__(self, inplanes, kernel=3, reduction=16, with_padding=False): 33 | super(GDWSe2d, self).__init__() 34 | if with_padding: 35 | padding = kernel // 2 36 | else: 37 | padding = 0 38 | 39 | self.globle_dw = nn.Conv2d(inplanes, inplanes, kernel_size=kernel, padding=padding, stride=1, 40 | groups=inplanes, bias=False) 41 | self.bn = nn.BatchNorm2d(inplanes) 42 | self.relu = nn.ReLU(inplace=True) 43 | 44 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 45 | self.fc = nn.Sequential( 46 | nn.Linear(inplanes, inplanes // reduction), 47 | nn.ReLU(inplace=True), 48 | nn.Linear(inplanes // reduction, inplanes), 49 | nn.Sigmoid() 50 | ) 51 | 52 | self._init_weights() 53 | 54 | def _init_weights(self): 55 | # weight initialization 56 | for m in self.modules(): 57 | if isinstance(m, nn.Conv2d): 58 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 59 | if m.bias is not None: 60 | nn.init.zeros_(m.bias) 61 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 62 | nn.init.constant_(m.weight, 1) 63 | nn.init.constant_(m.bias, 0) 64 | elif isinstance(m, nn.Linear): 65 | nn.init.normal_(m.weight, 0, 0.01) 66 | nn.init.constant_(m.bias, 0) 67 | 68 | def forward(self, x): 69 | y = self.globle_dw(x) 70 | y = self.bn(y) 71 | y = self.relu(y) 72 | 73 | n, c, _, _ = x.size() 74 | y = self.avg_pool(y).view(n, c) 75 | y = self.fc(y).view(n, c, 1, 1) 76 | return x * y.expand_as(x) 77 | -------------------------------------------------------------------------------- /rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/core/__init__.py -------------------------------------------------------------------------------- /rcnn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform import build_transforms 2 | from .dataset import build_dataset 3 | from .dataset import make_train_data_loader 4 | from .dataset import make_test_data_loader 5 | from .evaluation import evaluation 6 | from .evaluation import post_processing -------------------------------------------------------------------------------- /rcnn/datasets/dataset_catalog.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from utils.data.dataset_catalog import COMMON_DATASETS 4 | 5 | # Root directory of project 6 | ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) 7 | 8 | # Path to data dir 9 | _DATA_DIR = osp.abspath(osp.join(ROOT_DIR, 'data')) 10 | 11 | # Required dataset entry keys 12 | _IM_DIR = 'image_directory' 13 | _ANN_FN = 'annotation_file' 14 | 15 | # Available datasets 16 | _DATASETS = { 17 | } 18 | _DATASETS.update(COMMON_DATASETS) 19 | 20 | 21 | def datasets(): 22 | """Retrieve the list of available dataset names.""" 23 | return _DATASETS.keys() 24 | 25 | 26 | def contains(name): 27 | """Determine if the dataset is in the catalog.""" 28 | return name in _DATASETS.keys() 29 | 30 | 31 | def get_im_dir(name): 32 | """Retrieve the image directory for the dataset.""" 33 | return _DATASETS[name][_IM_DIR] 34 | 35 | 36 | def get_ann_fn(name): 37 | """Retrieve the annotation file for the dataset.""" 38 | return _DATASETS[name][_ANN_FN] 39 | -------------------------------------------------------------------------------- /rcnn/datasets/transform.py: -------------------------------------------------------------------------------- 1 | from utils.data import transforms as T 2 | 3 | from rcnn.core.config import cfg 4 | 5 | 6 | def build_transforms(is_train=True): 7 | if is_train: 8 | min_size = cfg.TRAIN.SCALES 9 | max_size = cfg.TRAIN.MAX_SIZE 10 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 11 | brightness = cfg.TRAIN.BRIGHTNESS 12 | contrast = cfg.TRAIN.CONTRAST 13 | saturation = cfg.TRAIN.SATURATION 14 | hue = cfg.TRAIN.HUE 15 | left_right = cfg.TRAIN.LEFT_RIGHT 16 | 17 | # for force resize 18 | force_test_scale = [-1, -1] 19 | scale_ratios = cfg.TRAIN.RANDOM_CROP.SCALE_RATIOS 20 | 21 | # for random crop 22 | preprocess_type = cfg.TRAIN.PREPROCESS_TYPE 23 | 24 | crop_sizes = cfg.TRAIN.RANDOM_CROP.CROP_SCALES 25 | crop_iou_ths = cfg.TRAIN.RANDOM_CROP.IOU_THS 26 | pad_pixel = cfg.TRAIN.RANDOM_CROP.PAD_PIXEL 27 | pad_pixel = (cfg.PIXEL_MEANS if len(pad_pixel) < 3 else pad_pixel) 28 | else: 29 | min_size = cfg.TEST.SCALE 30 | max_size = cfg.TEST.MAX_SIZE 31 | flip_prob = 0 32 | brightness = 0.0 33 | contrast = 0.0 34 | saturation = 0.0 35 | hue = 0.0 36 | left_right = () 37 | 38 | # for force resize 39 | force_test_scale = cfg.TEST.FORCE_TEST_SCALE 40 | scale_ratios = () 41 | 42 | # for random crop 43 | preprocess_type = "none" 44 | 45 | crop_sizes = () 46 | pad_pixel = () 47 | crop_iou_ths = () 48 | 49 | to_bgr255 = cfg.TO_BGR255 50 | normalize_transform = T.Normalize( 51 | mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS, to_bgr255=to_bgr255 52 | ) 53 | 54 | color_jitter = T.ColorJitter( 55 | brightness=brightness, 56 | contrast=contrast, 57 | saturation=saturation, 58 | hue=hue, 59 | ) 60 | 61 | transform = T.Compose( 62 | [ 63 | color_jitter, 64 | T.Resize(min_size, max_size, preprocess_type, scale_ratios, force_test_scale), 65 | T.RandomCrop(preprocess_type, crop_sizes, pad_pixel, crop_iou_ths), 66 | T.RandomHorizontalFlip(flip_prob, left_right), 67 | T.ToTensor(), 68 | normalize_transform, 69 | ] 70 | ) 71 | return transform 72 | -------------------------------------------------------------------------------- /rcnn/modeling/backbone/MobileNet_v2.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | import models.imagenet.mobilenet_v2 as mv2 6 | import models.ops as ops 7 | from models.imagenet.utils import make_divisible 8 | from utils.net import freeze_params, make_norm 9 | from rcnn.modeling import registry 10 | from rcnn.core.config import cfg 11 | 12 | 13 | class MobileNetV2(mv2.MobileNetV2): 14 | def __init__(self, norm='bn', activation=nn.ReLU6, stride=32): 15 | """ Constructor 16 | """ 17 | super(MobileNetV2, self).__init__() 18 | block = mv2.LinearBottleneck 19 | self.use_se = cfg.BACKBONE.MV2.USE_SE 20 | self.widen_factor = cfg.BACKBONE.MV2.WIDEN_FACTOR 21 | self.norm = norm 22 | self.activation_type = activation 23 | try: 24 | self.activation = activation(inplace=True) 25 | except: 26 | self.activation = activation() 27 | self.stride = stride 28 | 29 | layers_cfg = mv2.model_se(mv2.MV2_CFG['A']) if self.use_se else mv2.MV2_CFG['A'] 30 | num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]] 31 | self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels] 32 | self.layers = [len(lc) for lc in layers_cfg[2:-1]] 33 | 34 | self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8) 35 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4], 36 | padding=layers_cfg[0][0][0] // 2, bias=False) 37 | self.bn1 = make_norm(self.inplanes, norm=self.norm) 38 | 39 | self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1) 40 | self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1) 41 | self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1) 42 | self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1) 43 | self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1) 44 | 45 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.] 46 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))] 47 | 48 | del self.conv_out 49 | del self.bn_out 50 | del self.avgpool 51 | del self.fc 52 | self._init_weights() 53 | self._init_modules() 54 | 55 | def _init_modules(self): 56 | assert cfg.BACKBONE.MV2.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.MV2.FREEZE_AT: 2 57 | assert cfg.BACKBONE.MV2.FREEZE_AT <= len(self.layers) + 1 58 | if cfg.BACKBONE.MV2.FREEZE_AT > 0: 59 | freeze_params(getattr(self, 'conv1')) 60 | freeze_params(getattr(self, 'bn1')) 61 | for i in range(0, cfg.BACKBONE.MV2.FREEZE_AT): 62 | freeze_params(getattr(self, 'layer%d' % i)) 63 | # Freeze all bn (affine) layers !!! 64 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None) 65 | 66 | def train(self, mode=True): 67 | # Override train mode 68 | self.training = mode 69 | if cfg.BACKBONE.MV2.FREEZE_AT < 1: 70 | getattr(self, 'conv1').train(mode) 71 | getattr(self, 'bn1').train(mode) 72 | for i in range(cfg.BACKBONE.MV2.FREEZE_AT, len(self.layers) + 1): 73 | getattr(self, 'layer%d' % i).train(mode) 74 | 75 | def forward(self, x): 76 | x = self.conv1(x) 77 | x = self.bn1(x) 78 | x = self.activation(x) 79 | 80 | x = self.layer0(x) 81 | x2 = self.layer1(x) 82 | x3 = self.layer2(x2) 83 | x4 = self.layer3(x3) 84 | x5 = self.layer4(x4) 85 | 86 | return [x2, x3, x4, x5] 87 | 88 | 89 | # ---------------------------------------------------------------------------- # 90 | # MobileNetV2 Conv Body 91 | # ---------------------------------------------------------------------------- # 92 | @registry.BACKBONES.register("mobilenet_v2") 93 | def mobilenet_v2(): 94 | model = MobileNetV2() 95 | return model 96 | -------------------------------------------------------------------------------- /rcnn/modeling/backbone/MobileNet_v3.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | import models.imagenet.mobilenet_v3 as mv3 7 | import models.ops as ops 8 | from models.imagenet.utils import make_divisible, convert_conv2convsamepadding_model 9 | from utils.net import freeze_params, make_norm 10 | from rcnn.modeling import registry 11 | from rcnn.core.config import cfg 12 | 13 | 14 | class MobileNetV3(mv3.MobileNetV3): 15 | def __init__(self, norm='bn', activation=mv3.H_Swish, stride=32): 16 | """ Constructor 17 | """ 18 | super(MobileNetV3, self).__init__() 19 | block = mv3.LinearBottleneck 20 | self.widen_factor = cfg.BACKBONE.MV3.WIDEN_FACTOR 21 | self.norm = norm 22 | self.se_reduce_mid = cfg.BACKBONE.MV3.SE_REDUCE_MID 23 | self.se_divisible = cfg.BACKBONE.MV3.SE_DIVISIBLE 24 | self.head_use_bias = cfg.BACKBONE.MV3.HEAD_USE_BIAS 25 | self.force_residual = cfg.BACKBONE.MV3.FORCE_RESIDUAL 26 | self.sync_se_act = cfg.BACKBONE.MV3.SYNC_SE_ACT 27 | self.bn_eps = cfg.BACKBONE.BN_EPS 28 | self.activation_type = activation 29 | self.stride = stride 30 | 31 | setting = cfg.BACKBONE.MV3.SETTING 32 | layers_cfg = mv3.MV3_CFG[setting] 33 | num_of_channels = [lc[-1][1] for lc in layers_cfg[1:-1]] 34 | self.channels = [make_divisible(ch * self.widen_factor, 8) for ch in num_of_channels] 35 | self.activation = activation() if layers_cfg[0][0][3] else nn.ReLU(inplace=True) 36 | self.layers = [len(lc) for lc in layers_cfg[2:-1]] 37 | 38 | self.inplanes = make_divisible(layers_cfg[0][0][1] * self.widen_factor, 8) 39 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=layers_cfg[0][0][0], stride=layers_cfg[0][0][4], 40 | padding=layers_cfg[0][0][0] // 2, bias=False) 41 | self.bn1 = make_norm(self.inplanes, norm=self.norm, eps=self.bn_eps) 42 | 43 | self.layer0 = self._make_layer(block, layers_cfg[1], dilation=1) if layers_cfg[1][0][0] else None 44 | self.layer1 = self._make_layer(block, layers_cfg[2], dilation=1) 45 | self.layer2 = self._make_layer(block, layers_cfg[3], dilation=1) 46 | self.layer3 = self._make_layer(block, layers_cfg[4], dilation=1) 47 | self.layer4 = self._make_layer(block, layers_cfg[5], dilation=1) 48 | 49 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.] 50 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))] 51 | 52 | del self.last_stage 53 | del self.avgpool 54 | del self.conv_out 55 | del self.fc 56 | self._init_weights() 57 | self._init_modules() 58 | 59 | def _init_modules(self): 60 | assert cfg.BACKBONE.MV3.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.MV3.FREEZE_AT: 2 61 | assert cfg.BACKBONE.MV3.FREEZE_AT <= len(self.layers) + 1 62 | if cfg.BACKBONE.MV3.FREEZE_AT > 0: 63 | freeze_params(getattr(self, 'conv1')) 64 | freeze_params(getattr(self, 'bn1')) 65 | for i in range(0, cfg.BACKBONE.MV3.FREEZE_AT): 66 | if i == 0: 67 | freeze_params(getattr(self, 'layer0')) if self.layer0 is not None else None 68 | else: 69 | freeze_params(getattr(self, 'layer%d' % i)) 70 | # Freeze all bn (affine) layers !!! 71 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None) 72 | 73 | def train(self, mode=True): 74 | # Override train mode 75 | self.training = mode 76 | if cfg.BACKBONE.MV3.FREEZE_AT < 1: 77 | getattr(self, 'conv1').train(mode) 78 | getattr(self, 'bn1').train(mode) 79 | for i in range(cfg.BACKBONE.MV3.FREEZE_AT, len(self.layers) + 1): 80 | if i == 0: 81 | getattr(self, 'layer0').train(mode) if self.layer0 is not None else None 82 | else: 83 | getattr(self, 'layer%d' % i).train(mode) 84 | 85 | def forward(self, x): 86 | x = self.conv1(x) 87 | x = self.bn1(x) 88 | x = self.activation(x) 89 | 90 | if self.layer0 is not None: 91 | x = self.layer0(x) 92 | x2 = self.layer1(x) 93 | x3 = self.layer2(x2) 94 | x4 = self.layer3(x3) 95 | x5 = self.layer4(x4) 96 | 97 | return [x2, x3, x4, x5] 98 | 99 | 100 | # ---------------------------------------------------------------------------- # 101 | # MobileNet V3 Conv Body 102 | # ---------------------------------------------------------------------------- # 103 | @registry.BACKBONES.register("mobilenet_v3") 104 | def mobilenet_v3(): 105 | model = MobileNetV3() 106 | if cfg.BACKBONE.MV3.SAME_PAD: 107 | model = convert_conv2convsamepadding_model(model) 108 | return model 109 | -------------------------------------------------------------------------------- /rcnn/modeling/backbone/VoVNet.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | import models.imagenet.vovnet as vov 6 | import models.ops as ops 7 | from utils.net import freeze_params, make_norm 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | def get_norm(): 13 | norm = 'bn' 14 | if cfg.BACKBONE.VOV.USE_GN: 15 | norm = 'gn' 16 | return norm 17 | 18 | 19 | class VoVNet(vov.VoVNet): 20 | def __init__(self, norm='bn', stride=32): 21 | """ Constructor 22 | """ 23 | super(VoVNet, self).__init__() 24 | block = vov.OSABlock 25 | self.num_conv = cfg.BACKBONE.VOV.NUM_CONV # 5 26 | self.norm = norm 27 | self.stride = stride 28 | 29 | base_width = cfg.BACKBONE.VOV.WIDTH # 64 30 | stage_dims = cfg.BACKBONE.VOV.STAGE_DIMS 31 | concat_dims = cfg.BACKBONE.VOV.CONCAT_DIMS 32 | layers = cfg.BACKBONE.VOV.LAYERS 33 | self.layers = layers 34 | stage_with_conv = cfg.BACKBONE.VOV.STAGE_WITH_CONV 35 | self.channels = [base_width] + list(concat_dims) 36 | 37 | self.inplanes = base_width 38 | self.conv1 = nn.Conv2d(3, self.inplanes, 3, 2, 1, bias=False) 39 | self.bn1 = make_norm(self.inplanes, norm=self.norm) 40 | self.conv2 = nn.Conv2d(self.inplanes, self.inplanes, 3, 1, 1, bias=False) 41 | self.bn2 = make_norm(self.inplanes, norm=self.norm) 42 | self.conv3 = nn.Conv2d(self.inplanes, self.inplanes * 2, 3, 2, 1, bias=False) 43 | self.bn3 = make_norm(self.inplanes * 2, norm=self.norm) 44 | self.relu = nn.ReLU(inplace=True) 45 | self.inplanes = self.inplanes * 2 46 | 47 | self.layer1 = self._make_layer(block, stage_dims[0], concat_dims[0], layers[0], 1, conv=stage_with_conv[0]) 48 | self.layer2 = self._make_layer(block, stage_dims[1], concat_dims[1], layers[1], 2, conv=stage_with_conv[1]) 49 | self.layer3 = self._make_layer(block, stage_dims[2], concat_dims[2], layers[2], 2, conv=stage_with_conv[2]) 50 | self.layer4 = self._make_layer(block, stage_dims[3], concat_dims[3], layers[3], 2, conv=stage_with_conv[3]) 51 | 52 | self.spatial_scale = [1 / 4., 1 / 8., 1 / 16., 1 / 32.] 53 | self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))] 54 | 55 | del self.avgpool 56 | del self.fc 57 | self._init_weights() 58 | self._init_modules() 59 | 60 | def _init_modules(self): 61 | assert cfg.BACKBONE.VOV.FREEZE_AT in [0, 2, 3, 4, 5] # cfg.BACKBONE.VOV.FREEZE_AT: 2 62 | assert cfg.BACKBONE.VOV.FREEZE_AT <= len(self.layers) + 1 63 | if cfg.BACKBONE.VOV.FREEZE_AT > 0: 64 | freeze_params(getattr(self, 'conv1')) 65 | freeze_params(getattr(self, 'bn1')) 66 | freeze_params(getattr(self, 'conv2')) 67 | freeze_params(getattr(self, 'bn2')) 68 | freeze_params(getattr(self, 'conv3')) 69 | freeze_params(getattr(self, 'bn3')) 70 | for i in range(1, cfg.BACKBONE.VOV.FREEZE_AT): 71 | freeze_params(getattr(self, 'layer%d' % i)) 72 | # Freeze all bn (affine) layers !!! 73 | self.apply(lambda m: freeze_params(m) if isinstance(m, ops.AffineChannel2d) else None) 74 | 75 | def train(self, mode=True): 76 | # Override train mode 77 | self.training = mode 78 | if cfg.BACKBONE.VOV.FREEZE_AT < 1: 79 | getattr(self, 'conv1').train(mode) 80 | getattr(self, 'bn1').train(mode) 81 | getattr(self, 'conv2').train(mode) 82 | getattr(self, 'bn2').train(mode) 83 | getattr(self, 'conv3').train(mode) 84 | getattr(self, 'bn3').train(mode) 85 | for i in range(cfg.BACKBONE.VOV.FREEZE_AT, len(self.layers) + 1): 86 | if i == 0: 87 | continue 88 | getattr(self, 'layer%d' % i).train(mode) 89 | 90 | def forward(self, x): 91 | x = self.conv1(x) 92 | x = self.bn1(x) 93 | x = self.relu(x) 94 | x = self.conv2(x) 95 | x = self.bn2(x) 96 | x = self.relu(x) 97 | x = self.conv3(x) 98 | x = self.bn3(x) 99 | x = self.relu(x) 100 | 101 | x2 = self.layer1(x) 102 | x3 = self.layer2(x2) 103 | x4 = self.layer3(x3) 104 | x5 = self.layer4(x4) 105 | 106 | return [x2, x3, x4, x5] 107 | 108 | # ---------------------------------------------------------------------------- # 109 | # VoVNet Conv Body 110 | # ---------------------------------------------------------------------------- # 111 | @registry.BACKBONES.register("vovnet") 112 | def vovnet(): 113 | model = VoVNet(norm=get_norm()) 114 | return model 115 | -------------------------------------------------------------------------------- /rcnn/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .HRNet import * 2 | from .MobileNet_v1 import * 3 | from .MobileNet_v2 import * 4 | from .MobileNet_v3 import * 5 | from .ResNet import * 6 | from .ResNeXt import * 7 | from .VoVNet import * 8 | -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/cascade_rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from rcnn.modeling.cascade_rcnn import heads 5 | from rcnn.modeling.cascade_rcnn import outputs 6 | from rcnn.modeling.cascade_rcnn.inference import box_post_processor 7 | from rcnn.modeling.cascade_rcnn.loss import box_loss_evaluator 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | class CascadeRCNN(torch.nn.Module): 13 | """ 14 | Generic Box Head class. 15 | """ 16 | def __init__(self, dim_in, spatial_scale): 17 | super(CascadeRCNN, self).__init__() 18 | self.num_stage = cfg.CASCADE_RCNN.NUM_STAGE 19 | self.test_stage = cfg.CASCADE_RCNN.TEST_STAGE 20 | self.stage_loss_weights = cfg.CASCADE_RCNN.STAGE_WEIGHTS 21 | self.test_ensemble = cfg.CASCADE_RCNN.TEST_ENSEMBLE 22 | 23 | head = registry.ROI_CASCADE_HEADS[cfg.CASCADE_RCNN.ROI_BOX_HEAD] 24 | output = registry.ROI_CASCADE_OUTPUTS[cfg.CASCADE_RCNN.ROI_BOX_OUTPUT] 25 | 26 | for stage in range(1, self.num_stage + 1): 27 | stage_name = '_{}'.format(stage) 28 | setattr(self, 'Box_Head' + stage_name, head(dim_in, spatial_scale)) 29 | setattr(self, 'Output' + stage_name, output(getattr(self, 'Box_Head' + stage_name).dim_out)) 30 | 31 | def forward(self, features, proposals, targets=None): 32 | if self.training: 33 | return self._forward_train(features, proposals, targets) 34 | else: 35 | return self._forward_test(features, proposals) 36 | 37 | def _forward_train(self, features, proposals, targets=None): 38 | all_loss = dict() 39 | for i in range(self.num_stage): 40 | head = getattr(self, 'Box_Head_{}'.format(i + 1)) 41 | output = getattr(self, 'Output_{}'.format(i + 1)) 42 | loss_evaluator = box_loss_evaluator(i) 43 | 44 | # Cascade R-CNN subsamples during training the proposals with a fixed 45 | # positive / negative ratio 46 | with torch.no_grad(): 47 | proposals = loss_evaluator.subsample(proposals, targets) 48 | 49 | # extract features that will be fed to the final classifier. The 50 | # feature_extractor generally corresponds to the pooler + heads 51 | x = head(features, proposals) 52 | # final classifier that converts the features into predictions 53 | class_logits, box_regression = output(x) 54 | 55 | loss_classifier, loss_box_reg = loss_evaluator([class_logits], [box_regression]) 56 | loss_scalar = self.stage_loss_weights[i] 57 | all_loss['s{}_cls_loss'.format(i + 1)] = loss_classifier * loss_scalar 58 | all_loss['s{}_bbox_loss'.format(i + 1)] = loss_box_reg * loss_scalar 59 | 60 | with torch.no_grad(): 61 | if i < self.num_stage - 1: 62 | post_processor_train = box_post_processor(i, is_train=True) 63 | proposals = post_processor_train((class_logits, box_regression), proposals, targets) 64 | 65 | return x, proposals, all_loss 66 | 67 | def _forward_test(self, features, proposals): 68 | ms_scores = [] 69 | for i in range(self.num_stage): 70 | head = getattr(self, 'Box_Head_{}'.format(i + 1)) 71 | output = getattr(self, 'Output_{}'.format(i + 1)) 72 | post_processor_test = box_post_processor(i, is_train=False) 73 | # extract features that will be fed to the final classifier. The 74 | # feature_extractor generally corresponds to the pooler + heads 75 | x = head(features, proposals) 76 | # final classifier that converts the features into predictions 77 | class_logits, box_regression = output(x) 78 | ms_scores.append(class_logits) 79 | 80 | if i < self.test_stage - 1: 81 | proposals = post_processor_test((class_logits, box_regression), proposals) 82 | else: 83 | if self.test_ensemble: 84 | assert len(ms_scores) == self.test_stage 85 | class_logits = sum(ms_scores) / self.test_stage 86 | result = post_processor_test((class_logits, box_regression), proposals) 87 | return x, result, {} 88 | -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convfc_heads import * 2 | from .mlp_heads import * 3 | 4 | -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/heads/convfc_heads.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from models.imagenet.utils import convert_conv2convws_model 5 | from utils.net import make_conv, make_fc 6 | from rcnn.utils.poolers import Pooler 7 | from rcnn.modeling import registry 8 | from rcnn.core.config import cfg 9 | 10 | 11 | @registry.ROI_CASCADE_HEADS.register("roi_xconv1fc_head") 12 | class roi_xconv1fc_head(nn.Module): 13 | """Add a X conv + 1fc head""" 14 | 15 | def __init__(self, dim_in, spatial_scale): 16 | super().__init__() 17 | self.dim_in = dim_in[-1] 18 | 19 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD 20 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION 21 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO 22 | pooler = Pooler( 23 | method=method, 24 | output_size=resolution, 25 | scales=spatial_scale, 26 | sampling_ratio=sampling_ratio, 27 | ) 28 | self.pooler = pooler 29 | 30 | use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE 31 | use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN 32 | use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN 33 | conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM 34 | num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS 35 | dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION 36 | 37 | xconvs = [] 38 | for ix in range(num_stacked_convs): 39 | xconvs.append( 40 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 41 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True) 42 | ) 43 | self.dim_in = conv_dim 44 | self.add_module("xconvs", nn.Sequential(*xconvs)) 45 | 46 | input_size = self.dim_in * resolution[0] * resolution[1] 47 | mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM 48 | self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False) 49 | self.dim_out = mlp_dim 50 | 51 | if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS: 52 | self = convert_conv2convws_model(self) 53 | 54 | def forward(self, x, proposals): 55 | x = self.pooler(x, proposals) 56 | x = self.xconvs(x) 57 | x = x.view(x.size(0), -1) 58 | x = F.relu(self.fc6(x), inplace=True) 59 | 60 | return x 61 | -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/heads/mlp_heads.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.imagenet.utils import convert_conv2convws_model 6 | from utils.net import make_fc 7 | from rcnn.utils.poolers import Pooler 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | @registry.ROI_CASCADE_HEADS.register("roi_2mlp_head") 13 | class roi_2mlp_head(nn.Module): 14 | """Add a ReLU MLP with two hidden layers.""" 15 | 16 | def __init__(self, dim_in, spatial_scale): 17 | super().__init__() 18 | self.dim_in = dim_in[-1] 19 | 20 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD 21 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION 22 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO 23 | pooler = Pooler( 24 | method=method, 25 | output_size=resolution, 26 | scales=spatial_scale, 27 | sampling_ratio=sampling_ratio, 28 | ) 29 | input_size = self.dim_in * resolution[0] * resolution[1] 30 | mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM 31 | use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN 32 | use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN 33 | self.pooler = pooler 34 | self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn) 35 | self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn) 36 | self.dim_out = mlp_dim 37 | 38 | if cfg.FAST_RCNN.MLP_HEAD.USE_WS: 39 | self = convert_conv2convws_model(self) 40 | 41 | def forward(self, x, proposals): 42 | x = self.pooler(x, proposals) 43 | x = x.view(x.size(0), -1) 44 | 45 | x = F.relu(self.fc6(x), inplace=True) 46 | x = F.relu(self.fc7(x), inplace=True) 47 | 48 | return x 49 | -------------------------------------------------------------------------------- /rcnn/modeling/cascade_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.init as init 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | # ---------------------------------------------------------------------------- # 9 | # R-CNN bbox branch outputs 10 | # ---------------------------------------------------------------------------- # 11 | @registry.ROI_CASCADE_OUTPUTS.register("box_output") 12 | class Box_output(nn.Module): 13 | def __init__(self, dim_in): 14 | super().__init__() 15 | self.dim_in = dim_in 16 | 17 | self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES) 18 | # self.avgpool = nn.AdaptiveAvgPool2d(1) 19 | if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG: # bg and fg 20 | self.bbox_pred = nn.Linear(self.dim_in, 4 * 2) 21 | else: 22 | raise NotImplementedError 23 | # self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES) 24 | 25 | self._init_weights() 26 | 27 | def _init_weights(self): 28 | init.normal_(self.cls_score.weight, std=0.01) 29 | init.constant_(self.cls_score.bias, 0) 30 | init.normal_(self.bbox_pred.weight, std=0.001) 31 | init.constant_(self.bbox_pred.bias, 0) 32 | 33 | def forward(self, x): 34 | if x.ndimension() == 4: 35 | x = nn.functional.adaptive_avg_pool2d(x, 1) 36 | # x = self.avgpool(x) 37 | x = x.view(x.size(0), -1) 38 | cls_score = self.cls_score(x) 39 | bbox_pred = self.bbox_pred(x) 40 | 41 | return cls_score, bbox_pred 42 | -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/fast_rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from rcnn.modeling.fast_rcnn import heads 5 | from rcnn.modeling.fast_rcnn import outputs 6 | from rcnn.modeling.fast_rcnn.inference import box_post_processor 7 | from rcnn.modeling.fast_rcnn.loss import box_loss_evaluator 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | class FastRCNN(torch.nn.Module): 13 | """ 14 | Generic Box Head class. 15 | """ 16 | 17 | def __init__(self, dim_in, spatial_scale): 18 | super(FastRCNN, self).__init__() 19 | head = registry.ROI_BOX_HEADS[cfg.FAST_RCNN.ROI_BOX_HEAD] 20 | self.Head = head(dim_in, spatial_scale) 21 | output = registry.ROI_BOX_OUTPUTS[cfg.FAST_RCNN.ROI_BOX_OUTPUT] 22 | self.Output = output(self.Head.dim_out) 23 | 24 | self.post_processor = box_post_processor() 25 | self.loss_evaluator = box_loss_evaluator() 26 | 27 | def forward(self, features, proposals, targets=None): 28 | """ 29 | Arguments: 30 | features (list[Tensor]): feature-maps from possibly several levels 31 | proposals (list[BoxList]): proposal boxes 32 | targets (list[BoxList], optional): the ground-truth targets. 33 | 34 | Returns: 35 | x (Tensor): the result of the feature extractor 36 | proposals (list[BoxList]): during training, the subsampled proposals 37 | are returned. During testing, the predicted boxlists are returned 38 | losses (dict[Tensor]): During training, returns the losses for the 39 | head. During testing, returns an empty dict. 40 | """ 41 | if self.training: 42 | return self._forward_train(features, proposals, targets) 43 | else: 44 | return self._forward_test(features, proposals) 45 | 46 | def _forward_train(self, features, proposals, targets=None): 47 | # Faster R-CNN subsamples during training the proposals with a fixed 48 | # positive / negative ratio 49 | with torch.no_grad(): 50 | proposals = self.loss_evaluator.subsample(proposals, targets) 51 | 52 | # extract features that will be fed to the final classifier. The 53 | # feature_extractor generally corresponds to the pooler + heads 54 | x = self.Head(features, proposals) 55 | # final classifier that converts the features into predictions 56 | class_logits, box_regression = self.Output(x) 57 | 58 | losses = self.loss_evaluator([class_logits], [box_regression]) 59 | return x, proposals, losses 60 | 61 | def _forward_test(self, features, proposals): 62 | # extract features that will be fed to the final classifier. The 63 | # feature_extractor generally corresponds to the pooler + heads 64 | x = self.Head(features, proposals) 65 | # final classifier that converts the features into predictions 66 | class_logits, box_regression = self.Output(x) 67 | 68 | result = self.post_processor((class_logits, box_regression), proposals) 69 | return x, result, {} 70 | -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp_heads import * 2 | from .convfc_heads import * 3 | -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/heads/convfc_heads.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from models.imagenet.utils import convert_conv2convws_model 5 | from utils.net import make_conv, make_fc 6 | from rcnn.utils.poolers import Pooler 7 | from rcnn.modeling import registry 8 | from rcnn.core.config import cfg 9 | 10 | 11 | @registry.ROI_BOX_HEADS.register("roi_xconv1fc_head") 12 | class roi_xconv1fc_head(nn.Module): 13 | """Add a X conv + 1fc head""" 14 | 15 | def __init__(self, dim_in, spatial_scale): 16 | super().__init__() 17 | self.dim_in = dim_in[-1] 18 | 19 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD 20 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION 21 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO 22 | pooler = Pooler( 23 | method=method, 24 | output_size=resolution, 25 | scales=spatial_scale, 26 | sampling_ratio=sampling_ratio, 27 | ) 28 | self.pooler = pooler 29 | 30 | use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE 31 | use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN 32 | use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN 33 | conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM 34 | num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS 35 | dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION 36 | 37 | xconvs = [] 38 | for ix in range(num_stacked_convs): 39 | xconvs.append( 40 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 41 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True) 42 | ) 43 | self.dim_in = conv_dim 44 | self.add_module("xconvs", nn.Sequential(*xconvs)) 45 | 46 | input_size = self.dim_in * resolution[0] * resolution[1] 47 | mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM 48 | self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False) 49 | self.dim_out = mlp_dim 50 | 51 | if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS: 52 | self = convert_conv2convws_model(self) 53 | 54 | def forward(self, x, proposals): 55 | x = self.pooler(x, proposals) 56 | x = self.xconvs(x) 57 | x = x.view(x.size(0), -1) 58 | x = F.relu(self.fc6(x), inplace=True) 59 | 60 | return x 61 | -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/heads/mlp_heads.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.imagenet.utils import convert_conv2convws_model 6 | from utils.net import make_fc 7 | from rcnn.utils.poolers import Pooler 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | @registry.ROI_BOX_HEADS.register("roi_2mlp_head") 13 | class roi_2mlp_head(nn.Module): 14 | """Add a ReLU MLP with two hidden layers.""" 15 | 16 | def __init__(self, dim_in, spatial_scale): 17 | super().__init__() 18 | self.dim_in = dim_in[-1] 19 | 20 | method = cfg.FAST_RCNN.ROI_XFORM_METHOD 21 | resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION 22 | sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO 23 | pooler = Pooler( 24 | method=method, 25 | output_size=resolution, 26 | scales=spatial_scale, 27 | sampling_ratio=sampling_ratio, 28 | ) 29 | input_size = self.dim_in * resolution[0] * resolution[1] 30 | mlp_dim = cfg.FAST_RCNN.MLP_HEAD.MLP_DIM 31 | use_bn = cfg.FAST_RCNN.MLP_HEAD.USE_BN 32 | use_gn = cfg.FAST_RCNN.MLP_HEAD.USE_GN 33 | self.pooler = pooler 34 | self.fc6 = make_fc(input_size, mlp_dim, use_bn, use_gn) 35 | self.fc7 = make_fc(mlp_dim, mlp_dim, use_bn, use_gn) 36 | self.dim_out = mlp_dim 37 | 38 | if cfg.FAST_RCNN.MLP_HEAD.USE_WS: 39 | self = convert_conv2convws_model(self) 40 | 41 | def forward(self, x, proposals): 42 | x = self.pooler(x, proposals) 43 | x = x.view(x.size(0), -1) 44 | 45 | x = F.relu(self.fc6(x), inplace=True) 46 | x = F.relu(self.fc7(x), inplace=True) 47 | 48 | return x 49 | -------------------------------------------------------------------------------- /rcnn/modeling/fast_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.init as init 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | # ---------------------------------------------------------------------------- # 9 | # R-CNN bbox branch outputs 10 | # ---------------------------------------------------------------------------- # 11 | @registry.ROI_BOX_OUTPUTS.register("box_output") 12 | class Box_output(nn.Module): 13 | def __init__(self, dim_in): 14 | super().__init__() 15 | self.dim_in = dim_in 16 | self.cls_on = cfg.FAST_RCNN.CLS_ON 17 | self.reg_on = cfg.FAST_RCNN.REG_ON 18 | 19 | if self.cls_on: 20 | self.cls_score = nn.Linear(self.dim_in, cfg.MODEL.NUM_CLASSES) 21 | init.normal_(self.cls_score.weight, std=0.01) 22 | init.constant_(self.cls_score.bias, 0) 23 | # self.avgpool = nn.AdaptiveAvgPool2d(1) 24 | if self.reg_on: 25 | if cfg.FAST_RCNN.CLS_AGNOSTIC_BBOX_REG: # bg and fg 26 | self.bbox_pred = nn.Linear(self.dim_in, 4 * 2) 27 | else: 28 | self.bbox_pred = nn.Linear(self.dim_in, 4 * cfg.MODEL.NUM_CLASSES) 29 | init.normal_(self.bbox_pred.weight, std=0.001) 30 | init.constant_(self.bbox_pred.bias, 0) 31 | 32 | def forward(self, x): 33 | if x.ndimension() == 4: 34 | x = nn.functional.adaptive_avg_pool2d(x, 1) 35 | # x = self.avgpool(x) 36 | x = x.view(x.size(0), -1) 37 | cls_score = self.cls_score(x) if self.cls_on else None 38 | bbox_pred = self.bbox_pred(x) if self.reg_on else None 39 | 40 | return cls_score, bbox_pred 41 | -------------------------------------------------------------------------------- /rcnn/modeling/fpn/HRFPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from utils.net import make_conv 6 | from rcnn.core.config import cfg 7 | from rcnn.modeling import registry 8 | 9 | 10 | # ---------------------------------------------------------------------------- # 11 | # Functions for bolting HRFPN onto a backbone architectures 12 | # ---------------------------------------------------------------------------- # 13 | @registry.FPN_BODY.register("hrfpn") 14 | class hrfpn(nn.Module): 15 | # dim_in = [w, w * 2, w * 4, w * 8] 16 | # spatial_scale = [1/4, 1/8, 1/16, 1/32] 17 | def __init__(self, dim_in, spatial_scale): 18 | super().__init__() 19 | self.dim_in = sum(dim_in) 20 | self.spatial_scale = spatial_scale 21 | 22 | hrfpn_dim = cfg.FPN.HRFPN.DIM # 256 23 | use_lite = cfg.FPN.HRFPN.USE_LITE 24 | use_bn = cfg.FPN.HRFPN.USE_BN 25 | use_gn = cfg.FPN.HRFPN.USE_GN 26 | if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG': 27 | self.pooling = F.avg_pool2d 28 | else: 29 | self.pooling = F.max_pool2d 30 | self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING # 1 31 | self.num_output = len(dim_in) + self.num_extra_pooling # 5 32 | 33 | self.reduction_conv = make_conv(self.dim_in, hrfpn_dim, kernel=1, use_bn=use_bn, use_gn=use_gn) 34 | self.dim_in = hrfpn_dim 35 | 36 | self.fpn_conv = nn.ModuleList() 37 | for i in range(self.num_output): 38 | self.fpn_conv.append( 39 | make_conv(self.dim_in, hrfpn_dim, kernel=3, use_dwconv=use_lite, use_bn=use_bn, use_gn=use_gn, 40 | suffix_1x1=use_lite) 41 | ) 42 | self.dim_in = hrfpn_dim 43 | 44 | if self.num_extra_pooling: 45 | self.spatial_scale.append(self.spatial_scale[-1] * 0.5) 46 | self.dim_out = [self.dim_in for _ in range(self.num_output)] 47 | self._init_weights() 48 | 49 | def _init_weights(self): 50 | # weight initialization 51 | for m in self.modules(): 52 | if isinstance(m, nn.Conv2d): 53 | nn.init.kaiming_uniform_(m.weight, a=1) 54 | if m.bias is not None: 55 | nn.init.zeros_(m.bias) 56 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 57 | nn.init.constant_(m.weight, 1) 58 | nn.init.constant_(m.bias, 0) 59 | 60 | def forward(self, x): 61 | outs = [x[0]] 62 | for i in range(1, len(x)): 63 | outs.append(F.interpolate(x[i], scale_factor=2**i, mode='bilinear')) 64 | out = torch.cat(outs, dim=1) 65 | out = self.reduction_conv(out) 66 | 67 | outs = [out] 68 | for i in range(1, self.num_output): 69 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i)) 70 | fpn_output_blobs = [] 71 | for i in range(self.num_output): 72 | fpn_output_blobs.append(self.fpn_conv[i](outs[i])) 73 | 74 | # use all levels 75 | return fpn_output_blobs # [P2 - P6] 76 | -------------------------------------------------------------------------------- /rcnn/modeling/fpn/__init__.py: -------------------------------------------------------------------------------- 1 | from .FPN import * 2 | from .HRFPN import * 3 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/keypoint_rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convx_heads import * 2 | from .gce_heads import * 3 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/heads/convx_heads.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from utils.net import make_conv 5 | from rcnn.utils.poolers import Pooler 6 | from rcnn.modeling import registry 7 | from rcnn.core.config import cfg 8 | 9 | 10 | @registry.ROI_KEYPOINT_HEADS.register("roi_convx_head") 11 | class roi_convx_head(nn.Module): 12 | def __init__(self, dim_in, spatial_scale): 13 | super(roi_convx_head, self).__init__() 14 | self.dim_in = dim_in[-1] 15 | 16 | method = cfg.KRCNN.ROI_XFORM_METHOD 17 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION 18 | sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO 19 | pooler = Pooler( 20 | method=method, 21 | output_size=resolution, 22 | scales=spatial_scale, 23 | sampling_ratio=sampling_ratio, 24 | ) 25 | self.pooler = pooler 26 | 27 | use_lite = cfg.KRCNN.CONVX_HEAD.USE_LITE 28 | use_bn = cfg.KRCNN.CONVX_HEAD.USE_BN 29 | use_gn = cfg.KRCNN.CONVX_HEAD.USE_GN 30 | conv_dim = cfg.KRCNN.CONVX_HEAD.CONV_DIM 31 | num_stacked_convs = cfg.KRCNN.CONVX_HEAD.NUM_STACKED_CONVS 32 | dilation = cfg.KRCNN.CONVX_HEAD.DILATION 33 | 34 | self.blocks = [] 35 | for layer_idx in range(num_stacked_convs): 36 | layer_name = "keypoint_fcn{}".format(layer_idx + 1) 37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) 39 | self.add_module(layer_name, module) 40 | self.dim_in = conv_dim 41 | self.blocks.append(layer_name) 42 | self.dim_out = self.dim_in 43 | 44 | def forward(self, x, proposals): 45 | x = self.pooler(x, proposals) 46 | 47 | for layer_name in self.blocks: 48 | x = F.relu(getattr(self, layer_name)(x)) 49 | 50 | return x 51 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/heads/gce_heads.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from models.ops import interpolate, NonLocal2d 5 | from rcnn.core.config import cfg 6 | from rcnn.modeling import registry 7 | from rcnn.utils.poolers import Pooler 8 | from utils.net import make_conv 9 | 10 | 11 | @registry.ROI_KEYPOINT_HEADS.register("roi_gce_head") 12 | class roi_gce_head(nn.Module): 13 | def __init__(self, dim_in, spatial_scale): 14 | super(roi_gce_head, self).__init__() 15 | self.dim_in = dim_in[-1] 16 | 17 | method = cfg.KRCNN.ROI_XFORM_METHOD 18 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION 19 | sampling_ratio = cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO 20 | pooler = Pooler( 21 | method=method, 22 | output_size=resolution, 23 | scales=spatial_scale, 24 | sampling_ratio=sampling_ratio, 25 | ) 26 | self.pooler = pooler 27 | 28 | use_nl = cfg.KRCNN.GCE_HEAD.USE_NL 29 | use_bn = cfg.KRCNN.GCE_HEAD.USE_BN 30 | use_gn = cfg.KRCNN.GCE_HEAD.USE_GN 31 | conv_dim = cfg.KRCNN.GCE_HEAD.CONV_DIM 32 | asppv3_dim = cfg.KRCNN.GCE_HEAD.ASPPV3_DIM 33 | num_convs_before_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3 34 | asppv3_dilation = cfg.KRCNN.GCE_HEAD.ASPPV3_DILATION 35 | num_convs_after_asppv3 = cfg.KRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3 36 | 37 | # convx before asppv3 module 38 | before_asppv3_list = [] 39 | for _ in range(num_convs_before_asppv3): 40 | before_asppv3_list.append( 41 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 42 | ) 43 | self.dim_in = conv_dim 44 | self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None 45 | 46 | # asppv3 module 47 | self.asppv3 = [] 48 | self.asppv3.append( 49 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 50 | ) 51 | for dilation in asppv3_dilation: 52 | self.asppv3.append( 53 | make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn, 54 | use_relu=True) 55 | ) 56 | self.asppv3 = nn.ModuleList(self.asppv3) 57 | self.im_pool = nn.Sequential( 58 | nn.AdaptiveAvgPool2d(1), 59 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 60 | ) 61 | self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim 62 | 63 | feat_list = [] 64 | feat_list.append( 65 | make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 66 | ) 67 | if use_nl: 68 | feat_list.append( 69 | NonLocal2d(conv_dim, int(conv_dim * cfg.KRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True) 70 | ) 71 | self.feat = nn.Sequential(*feat_list) 72 | self.dim_in = conv_dim 73 | 74 | # convx after asppv3 module 75 | assert num_convs_after_asppv3 >= 1 76 | after_asppv3_list = [] 77 | for _ in range(num_convs_after_asppv3): 78 | after_asppv3_list.append( 79 | make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True) 80 | ) 81 | self.dim_in = conv_dim 82 | self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None 83 | self.dim_out = self.dim_in 84 | 85 | def forward(self, x, proposals): 86 | resolution = cfg.KRCNN.ROI_XFORM_RESOLUTION 87 | x = self.pooler(x, proposals) 88 | 89 | if self.conv_before_asppv3 is not None: 90 | x = self.conv_before_asppv3(x) 91 | 92 | asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution, 93 | mode="bilinear", align_corners=False)] 94 | for i in range(len(self.asppv3)): 95 | asppv3_out.append(self.asppv3[i](x)) 96 | asppv3_out = torch.cat(asppv3_out, 1) 97 | asppv3_out = self.feat(asppv3_out) 98 | 99 | if self.conv_after_asppv3 is not None: 100 | x = self.conv_after_asppv3(asppv3_out) 101 | return x 102 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/inference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from utils.data.structures.bounding_box import BoxList 8 | from utils.data.structures.keypoint import PersonKeypoints 9 | 10 | 11 | class KeypointPostProcessor(nn.Module): 12 | def __init__(self): 13 | super(KeypointPostProcessor, self).__init__() 14 | 15 | def forward(self, x, boxes): 16 | boxes_per_image = [len(box) for box in boxes] 17 | kpt_prob = x.split(boxes_per_image, dim=0) 18 | 19 | results = [] 20 | for prob, box in zip(kpt_prob, boxes): 21 | bbox = BoxList(box.bbox, box.size, mode="xyxy") 22 | for field in box.fields(): 23 | bbox.add_field(field, box.get_field(field)) 24 | bbox.add_field("keypoints", prob.cpu().numpy()) 25 | results.append(bbox) 26 | 27 | return results 28 | 29 | 30 | def keypoint_results(maps, rois): 31 | """Extract predicted keypoint locations from heatmaps. Output has shape 32 | (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) 33 | for each keypoint. 34 | """ 35 | # This function converts a discrete image coordinate in a HEATMAP_SIZE x 36 | # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain 37 | # consistency with keypoints_to_heatmap_labels by using the conversion from 38 | # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a 39 | # continuous coordinate. 40 | rois = rois.bbox.numpy() 41 | 42 | offset_x = rois[:, 0] 43 | offset_y = rois[:, 1] 44 | 45 | widths = rois[:, 2] - rois[:, 0] 46 | heights = rois[:, 3] - rois[:, 1] 47 | widths = np.maximum(widths, 1) 48 | heights = np.maximum(heights, 1) 49 | widths_ceil = np.ceil(widths) 50 | heights_ceil = np.ceil(heights) 51 | 52 | # NCHW to NHWC for use with OpenCV 53 | maps = np.transpose(maps, [0, 2, 3, 1]) 54 | min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE 55 | num_keypoints = maps.shape[3] 56 | xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32) 57 | end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32) 58 | for i in range(len(rois)): 59 | if min_size > 0: 60 | roi_map_width = int(np.maximum(widths_ceil[i], min_size)) 61 | roi_map_height = int(np.maximum(heights_ceil[i], min_size)) 62 | else: 63 | roi_map_width = widths_ceil[i] 64 | roi_map_height = heights_ceil[i] 65 | width_correction = widths[i] / roi_map_width 66 | height_correction = heights[i] / roi_map_height 67 | roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC) 68 | # Bring back to CHW 69 | roi_map = np.transpose(roi_map, [2, 0, 1]) 70 | # roi_map_probs = scores_to_probs(roi_map.copy()) 71 | w = roi_map.shape[2] 72 | pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1) 73 | x_int = pos % w 74 | y_int = (pos - x_int) // w 75 | # assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max()) 76 | x = (x_int + 0.5) * width_correction 77 | y = (y_int + 0.5) * height_correction 78 | xy_preds[i, 0, :] = x + offset_x[i] 79 | xy_preds[i, 1, :] = y + offset_y[i] 80 | xy_preds[i, 2, :] = 1 81 | end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int] 82 | 83 | xy = np.concatenate((xy_preds[:, :2, :], end_scores[:, np.newaxis, :]), axis=1) 84 | 85 | return np.transpose(xy_preds, [0, 2, 1]), xy 86 | 87 | 88 | def keypoint_post_processor(): 89 | keypoint_post_processor = KeypointPostProcessor() 90 | return keypoint_post_processor 91 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/keypoint_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from rcnn.modeling.keypoint_rcnn import heads 4 | from rcnn.modeling.keypoint_rcnn import outputs 5 | from rcnn.modeling.keypoint_rcnn.inference import keypoint_post_processor 6 | from rcnn.modeling.keypoint_rcnn.loss import keypoint_loss_evaluator 7 | from rcnn.modeling import registry 8 | from rcnn.core.config import cfg 9 | 10 | 11 | class KeypointRCNN(torch.nn.Module): 12 | def __init__(self, dim_in, spatial_scale): 13 | super(KeypointRCNN, self).__init__() 14 | if len(cfg.KRCNN.ROI_STRIDES) == 0: 15 | self.spatial_scale = spatial_scale 16 | else: 17 | self.spatial_scale = [1. / stride for stride in cfg.KRCNN.ROI_STRIDES] 18 | 19 | head = registry.ROI_KEYPOINT_HEADS[cfg.KRCNN.ROI_KEYPOINT_HEAD] 20 | self.Head = head(dim_in, self.spatial_scale) 21 | output = registry.ROI_KEYPOINT_OUTPUTS[cfg.KRCNN.ROI_KEYPOINT_OUTPUT] 22 | self.Output = output(self.Head.dim_out) 23 | 24 | self.post_processor = keypoint_post_processor() 25 | self.loss_evaluator = keypoint_loss_evaluator() 26 | 27 | def forward(self, conv_features, proposals, targets=None): 28 | if self.training: 29 | return self._forward_train(conv_features, proposals, targets) 30 | else: 31 | return self._forward_test(conv_features, proposals) 32 | 33 | def _forward_train(self, conv_features, proposals, targets=None): 34 | all_proposals = proposals 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.resample(proposals, targets) 37 | 38 | x = self.Head(conv_features, proposals) 39 | kp_logits = self.Output(x) 40 | 41 | loss_kp = self.loss_evaluator(kp_logits) 42 | 43 | return x, all_proposals, dict(loss_kp=loss_kp) 44 | 45 | def _forward_test(self, conv_features, proposals): 46 | x = self.Head(conv_features, proposals) 47 | kp_logits = self.Output(x) 48 | 49 | result = self.post_processor(kp_logits, proposals) 50 | return x, result, {} 51 | -------------------------------------------------------------------------------- /rcnn/modeling/keypoint_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | @registry.ROI_KEYPOINT_OUTPUTS.register("keypoint_output") 9 | class Keypoint_output(nn.Module): 10 | def __init__(self, dim_in): 11 | super(Keypoint_output, self).__init__() 12 | num_keypoints = cfg.KRCNN.NUM_CLASSES 13 | assert cfg.KRCNN.RESOLUTION[0] // cfg.KRCNN.ROI_XFORM_RESOLUTION[0] == \ 14 | cfg.KRCNN.RESOLUTION[1] // cfg.KRCNN.ROI_XFORM_RESOLUTION[1] 15 | self.up_scale = cfg.KRCNN.RESOLUTION[0] // (cfg.KRCNN.ROI_XFORM_RESOLUTION[0] * 2) 16 | 17 | deconv_kernel = 4 18 | self.kps_score_lowres = nn.ConvTranspose2d( 19 | dim_in, 20 | num_keypoints, 21 | deconv_kernel, 22 | stride=2, 23 | padding=deconv_kernel // 2 - 1, 24 | ) 25 | 26 | nn.init.kaiming_normal_(self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu") 27 | nn.init.constant_(self.kps_score_lowres.bias, 0) 28 | 29 | self.dim_out = num_keypoints 30 | 31 | def forward(self, x): 32 | x = self.kps_score_lowres(x) 33 | if self.up_scale > 1: 34 | x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 35 | 36 | return x 37 | -------------------------------------------------------------------------------- /rcnn/modeling/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/mask_rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/modeling/mask_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convx_heads import * 2 | -------------------------------------------------------------------------------- /rcnn/modeling/mask_rcnn/heads/convx_heads.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from models.imagenet.utils import convert_conv2convws_model 5 | from utils.net import make_conv 6 | from rcnn.utils.poolers import Pooler 7 | from rcnn.modeling import registry 8 | from rcnn.core.config import cfg 9 | 10 | 11 | @registry.ROI_MASK_HEADS.register("roi_convx_head") 12 | class roi_convx_head(nn.Module): 13 | """ 14 | Heads for FPN for classification 15 | """ 16 | 17 | def __init__(self, dim_in, spatial_scale): 18 | """ 19 | Arguments: 20 | num_classes (int): number of output classes 21 | input_size (int): number of channels of the input once it's flattened 22 | representation_size (int): size of the intermediate representation 23 | """ 24 | super(roi_convx_head, self).__init__() 25 | self.dim_in = dim_in[-1] 26 | 27 | method = cfg.MRCNN.ROI_XFORM_METHOD 28 | resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION 29 | sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO 30 | pooler = Pooler( 31 | method=method, 32 | output_size=resolution, 33 | scales=spatial_scale, 34 | sampling_ratio=sampling_ratio, 35 | ) 36 | self.pooler = pooler 37 | 38 | use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE 39 | use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN 40 | use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN 41 | conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM 42 | num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS 43 | dilation = cfg.MRCNN.CONVX_HEAD.DILATION 44 | 45 | self.blocks = [] 46 | for layer_idx in range(num_stacked_convs): 47 | layer_name = "mask_fcn{}".format(layer_idx + 1) 48 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 49 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) 50 | self.add_module(layer_name, module) 51 | self.dim_in = conv_dim 52 | self.blocks.append(layer_name) 53 | self.dim_out = self.dim_in 54 | 55 | if cfg.MRCNN.CONVX_HEAD.USE_WS: 56 | self = convert_conv2convws_model(self) 57 | 58 | for m in self.modules(): 59 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): 60 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity="relu") 61 | if m.bias is not None: 62 | nn.init.zeros_(m.bias) 63 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 64 | nn.init.constant_(m.weight, 1) 65 | nn.init.constant_(m.bias, 0) 66 | 67 | def forward(self, x, proposals): 68 | x = self.pooler(x, proposals) 69 | roi_feature = x 70 | for layer_name in self.blocks: 71 | x = F.relu(getattr(self, layer_name)(x)) 72 | 73 | return x, roi_feature 74 | 75 | -------------------------------------------------------------------------------- /rcnn/modeling/mask_rcnn/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from utils.data.structures.boxlist_ops import cat_boxlist 4 | from rcnn.modeling.mask_rcnn import heads 5 | from rcnn.modeling.mask_rcnn import outputs 6 | from rcnn.modeling.mask_rcnn.inference import mask_post_processor 7 | from rcnn.modeling.mask_rcnn.loss import mask_loss_evaluator 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | class MaskRCNN(torch.nn.Module): 13 | def __init__(self, dim_in, spatial_scale): 14 | super(MaskRCNN, self).__init__() 15 | if len(cfg.MRCNN.ROI_STRIDES) == 0: 16 | self.spatial_scale = spatial_scale 17 | else: 18 | self.spatial_scale = [1. / stride for stride in cfg.MRCNN.ROI_STRIDES] 19 | 20 | head = registry.ROI_MASK_HEADS[cfg.MRCNN.ROI_MASK_HEAD] 21 | self.Head = head(dim_in, self.spatial_scale) 22 | output = registry.ROI_MASK_OUTPUTS[cfg.MRCNN.ROI_MASK_OUTPUT] 23 | self.Output = output(self.Head.dim_out) 24 | 25 | self.post_processor = mask_post_processor() 26 | self.loss_evaluator = mask_loss_evaluator() 27 | 28 | def forward(self, conv_features, proposals, targets=None): 29 | """ 30 | Arguments: 31 | conv_features (list[Tensor]): feature-maps from possibly several levels 32 | proposals (list[BoxList]): proposal boxes 33 | targets (list[BoxList], optional): the ground-truth targets. 34 | 35 | Returns: 36 | x (Tensor): the result of the feature extractor 37 | all_proposals (list[BoxList]): during training, the original proposals 38 | are returned. During testing, the predicted boxlists are returned 39 | with the `mask` field set 40 | losses (dict[Tensor]): During training, returns the losses for the 41 | head. During testing, returns an empty dict. 42 | """ 43 | if self.training: 44 | return self._forward_train(conv_features, proposals, targets) 45 | else: 46 | return self._forward_test(conv_features, proposals) 47 | 48 | def _forward_train(self, conv_features, proposals, targets=None): 49 | # during training, only focus on positive boxes 50 | all_proposals = proposals 51 | 52 | with torch.no_grad(): 53 | proposals = self.loss_evaluator.resample(proposals, targets) 54 | 55 | x, roi_feature = self.Head(conv_features, proposals) 56 | mask_logits = self.Output(x) 57 | 58 | loss_mask = self.loss_evaluator(mask_logits) 59 | return x, all_proposals, dict(loss_mask=loss_mask) 60 | 61 | def _forward_test(self, conv_features, proposals): 62 | x, roi_feature = self.Head(conv_features, proposals) 63 | mask_logits = self.Output(x) 64 | 65 | result = self.post_processor(mask_logits, proposals) 66 | return x, result, {} 67 | -------------------------------------------------------------------------------- /rcnn/modeling/mask_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | @registry.ROI_MASK_OUTPUTS.register("mask_deconv_output") 9 | class Mask_deconv_output(nn.Module): 10 | def __init__(self, dim_in): 11 | super(Mask_deconv_output, self).__init__() 12 | num_classes = cfg.MODEL.NUM_CLASSES 13 | 14 | self.mask_deconv = nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0) 15 | self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0) 16 | 17 | # init 18 | nn.init.kaiming_normal_(self.mask_deconv.weight, mode='fan_out', nonlinearity="relu") 19 | if self.mask_deconv.bias is not None: 20 | nn.init.zeros_(self.mask_deconv.bias) 21 | nn.init.normal_(self.mask_fcn_logits.weight, std=0.001) 22 | if self.mask_fcn_logits.bias is not None: 23 | nn.init.constant_(self.mask_fcn_logits.bias, 0) 24 | 25 | def forward(self, x): 26 | x = F.relu(self.mask_deconv(x)) 27 | return self.mask_fcn_logits(x) 28 | 29 | 30 | @registry.ROI_MASK_OUTPUTS.register("mask_logits_output") 31 | class Mask_logits_output(nn.Module): 32 | def __init__(self, dim_in): 33 | super(Mask_logits_output, self).__init__() 34 | num_classes = cfg.MODEL.NUM_CLASSES 35 | 36 | self.mask_fcn_logits = nn.Conv2d(dim_in, num_classes, 1, 1, 0) 37 | 38 | # init 39 | nn.init.normal_(self.mask_fcn_logits.weight, std=0.001) 40 | if self.mask_fcn_logits.bias is not None: 41 | nn.init.constant_(self.mask_fcn_logits.bias, 0) 42 | 43 | def forward(self, x): 44 | return self.mask_fcn_logits(x) 45 | -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/modeling/parsing_rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convx_heads import * 2 | from .gce_heads import * 3 | -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/heads/convx_heads.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from utils.net import make_conv 5 | from rcnn.utils.poolers import Pooler 6 | from rcnn.modeling import registry 7 | from rcnn.core.config import cfg 8 | 9 | 10 | @registry.ROI_PARSING_HEADS.register("roi_convx_head") 11 | class roi_convx_head(nn.Module): 12 | def __init__(self, dim_in, spatial_scale): 13 | super(roi_convx_head, self).__init__() 14 | self.dim_in = dim_in[-1] 15 | 16 | method = cfg.PRCNN.ROI_XFORM_METHOD 17 | resolution = cfg.PRCNN.ROI_XFORM_RESOLUTION 18 | sampling_ratio = cfg.PRCNN.ROI_XFORM_SAMPLING_RATIO 19 | pooler = Pooler( 20 | method=method, 21 | output_size=resolution, 22 | scales=spatial_scale, 23 | sampling_ratio=sampling_ratio, 24 | ) 25 | self.pooler = pooler 26 | 27 | use_lite = cfg.PRCNN.CONVX_HEAD.USE_LITE 28 | use_bn = cfg.PRCNN.CONVX_HEAD.USE_BN 29 | use_gn = cfg.PRCNN.CONVX_HEAD.USE_GN 30 | conv_dim = cfg.PRCNN.CONVX_HEAD.CONV_DIM 31 | num_stacked_convs = cfg.PRCNN.CONVX_HEAD.NUM_STACKED_CONVS 32 | dilation = cfg.PRCNN.CONVX_HEAD.DILATION 33 | 34 | self.blocks = [] 35 | for layer_idx in range(num_stacked_convs): 36 | layer_name = "parsing_fcn{}".format(layer_idx + 1) 37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) 39 | self.add_module(layer_name, module) 40 | self.dim_in = conv_dim 41 | self.blocks.append(layer_name) 42 | self.dim_out = self.dim_in 43 | 44 | def forward(self, x, proposals): 45 | x = self.pooler(x, proposals) 46 | roi_feature = x 47 | for layer_name in self.blocks: 48 | x = F.relu(getattr(self, layer_name)(x)) 49 | 50 | return x, roi_feature 51 | -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from utils.data.structures.bounding_box import BoxList 8 | from models.ops.misc import interpolate 9 | from rcnn.core.config import cfg 10 | 11 | 12 | # TODO check if want to return a single BoxList or a composite 13 | # object 14 | class ParsingPostProcessor(nn.Module): 15 | """ 16 | From the results of the CNN, post process the masks 17 | by taking the mask corresponding to the class with max 18 | probability (which are of fixed size and directly output 19 | by the CNN) and return the masks in the mask field of the BoxList. 20 | If a masker object is passed, it will additionally 21 | project the masks in the image according to the locations in boxes, 22 | """ 23 | 24 | def __init__(self): 25 | super(ParsingPostProcessor, self).__init__() 26 | 27 | def forward(self, x, boxes): 28 | """ 29 | Arguments: 30 | x (Tensor): the mask logits 31 | boxes (list[BoxList]): bounding boxes that are used as 32 | reference, one for ech image 33 | Returns: 34 | results (list[BoxList]): one BoxList for each image, containing 35 | the extra field mask 36 | """ 37 | parsing_prob = x 38 | parsing_prob = F.softmax(parsing_prob, dim=1) 39 | 40 | boxes_per_image = [len(box) for box in boxes] 41 | parsing_prob = parsing_prob.split(boxes_per_image, dim=0) 42 | 43 | results = [] 44 | for prob, box in zip(parsing_prob, boxes): 45 | bbox = BoxList(box.bbox, box.size, mode="xyxy") 46 | 47 | for field in box.fields(): 48 | bbox.add_field(field, box.get_field(field)) 49 | bbox_scores = bbox.get_field("scores") 50 | bbox.add_field("parsing", prob.cpu().numpy()) 51 | bbox.add_field("parsing_scores", bbox_scores.cpu().numpy()) 52 | results.append(bbox) 53 | 54 | return results 55 | 56 | 57 | def expand_boxes(boxes, h, w): 58 | """Expand an array of boxes by a given scale.""" 59 | w_half = (boxes[:, 2] - boxes[:, 0]) * .5 60 | h_half = (boxes[:, 3] - boxes[:, 1]) * .5 61 | x_c = (boxes[:, 2] + boxes[:, 0]) * .5 62 | y_c = (boxes[:, 3] + boxes[:, 1]) * .5 63 | 64 | h_scale = (h + 2.0) / h 65 | w_scale = (w + 2.0) / w 66 | w_half *= w_scale 67 | h_half *= h_scale 68 | 69 | boxes_exp = np.zeros(boxes.shape) 70 | boxes_exp[:, 0] = x_c - w_half 71 | boxes_exp[:, 2] = x_c + w_half 72 | boxes_exp[:, 1] = y_c - h_half 73 | boxes_exp[:, 3] = y_c + h_half 74 | 75 | return boxes_exp 76 | 77 | 78 | def parsing_results(parsings, boxes, semseg=None): 79 | im_w, im_h = boxes.size 80 | parsings = parsings.transpose((0, 2, 3, 1)) 81 | boxes = boxes.bbox.numpy() 82 | H, W = parsings.shape[1:3] 83 | N = parsings.shape[3] 84 | 85 | boxes = expand_boxes(boxes, H, W) 86 | boxes = boxes.astype(np.int32) 87 | padded_parsing = np.zeros((H + 2, W + 2, N), dtype=np.float32) 88 | 89 | if semseg is not None: 90 | semseg = cv2.resize(semseg, (im_w, im_h), interpolation=cv2.INTER_LINEAR) 91 | else: 92 | semseg = np.zeros((im_h, im_w, N), dtype=np.float32) 93 | 94 | parsing_results = [] 95 | for i in range(boxes.shape[0]): 96 | padded_parsing[1:-1, 1:-1] = parsings[i] 97 | box = boxes[i, :] 98 | w = box[2] - box[0] + 1 99 | h = box[3] - box[1] + 1 100 | w = np.maximum(w, 1) 101 | h = np.maximum(h, 1) 102 | parsing = cv2.resize(padded_parsing, (w, h), interpolation=cv2.INTER_LINEAR) 103 | parsing_idx = np.argmax(parsing, axis=2) 104 | im_parsing = np.zeros((im_h, im_w), dtype=np.uint8) 105 | x_0 = max(box[0], 0) 106 | x_1 = min(box[2] + 1, im_w) 107 | y_0 = max(box[1], 0) 108 | y_1 = min(box[3] + 1, im_h) 109 | 110 | mask = np.where(parsing_idx >= 1, 1, 0) 111 | mask = mask[:, :, np.newaxis].repeat(N, axis=2) 112 | cropped_semseg = semseg[y_0:y_1, x_0:x_1] * mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] 113 | 114 | parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] += \ 115 | cropped_semseg * cfg.PRCNN.SEMSEG_FUSE_WEIGHT 116 | parsing = np.argmax(parsing, axis=2) 117 | 118 | im_parsing[y_0:y_1, x_0:x_1] = parsing[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] 119 | parsing_results.append(im_parsing) 120 | return parsing_results 121 | 122 | 123 | def parsing_post_processor(): 124 | parsing_post_processor = ParsingPostProcessor() 125 | return parsing_post_processor 126 | -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | @registry.ROI_PARSING_OUTPUTS.register("parsing_output") 9 | class Parsing_output(nn.Module): 10 | def __init__(self, dim_in): 11 | super(Parsing_output, self).__init__() 12 | num_parsing = cfg.PRCNN.NUM_PARSING 13 | assert cfg.PRCNN.RESOLUTION[0] // cfg.PRCNN.ROI_XFORM_RESOLUTION[0] == \ 14 | cfg.PRCNN.RESOLUTION[1] // cfg.PRCNN.ROI_XFORM_RESOLUTION[1] 15 | self.up_scale = cfg.PRCNN.RESOLUTION[0] // (cfg.PRCNN.ROI_XFORM_RESOLUTION[0] * 2) 16 | 17 | deconv_kernel = 4 18 | self.parsing_score_lowres = nn.ConvTranspose2d( 19 | dim_in, 20 | num_parsing, 21 | deconv_kernel, 22 | stride=2, 23 | padding=deconv_kernel // 2 - 1, 24 | ) 25 | 26 | nn.init.kaiming_normal_(self.parsing_score_lowres.weight, mode="fan_out", nonlinearity="relu") 27 | nn.init.constant_(self.parsing_score_lowres.bias, 0) 28 | 29 | self.dim_out = num_parsing 30 | 31 | def forward(self, x): 32 | x = self.parsing_score_lowres(x) 33 | if self.up_scale > 1: 34 | x = F.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 35 | 36 | return x 37 | -------------------------------------------------------------------------------- /rcnn/modeling/parsing_rcnn/parsing_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from rcnn.modeling.parsing_rcnn import heads 4 | from rcnn.modeling.parsing_rcnn import outputs 5 | from rcnn.modeling.parsing_rcnn.inference import parsing_post_processor 6 | from rcnn.modeling.parsing_rcnn.loss import parsing_loss_evaluator 7 | from rcnn.modeling import registry 8 | from rcnn.core.config import cfg 9 | 10 | 11 | class ParsingRCNN(torch.nn.Module): 12 | def __init__(self, dim_in, spatial_scale): 13 | super(ParsingRCNN, self).__init__() 14 | if len(cfg.PRCNN.ROI_STRIDES) == 0: 15 | self.spatial_scale = spatial_scale 16 | else: 17 | self.spatial_scale = [1. / stride for stride in cfg.PRCNN.ROI_STRIDES] 18 | 19 | head = registry.ROI_PARSING_HEADS[cfg.PRCNN.ROI_PARSING_HEAD] 20 | self.Head = head(dim_in, self.spatial_scale) 21 | output = registry.ROI_PARSING_OUTPUTS[cfg.PRCNN.ROI_PARSING_OUTPUT] 22 | self.Output = output(self.Head.dim_out) 23 | 24 | self.post_processor = parsing_post_processor() 25 | self.loss_evaluator = parsing_loss_evaluator() 26 | 27 | def forward(self, conv_features, proposals, targets=None): 28 | """ 29 | Arguments: 30 | conv_features (list[Tensor]): feature-maps from possibly several levels 31 | proposals (list[BoxList]): proposal boxes 32 | targets (list[BoxList], optional): the ground-truth targets. 33 | Returns: 34 | x (Tensor): the result of the feature extractor 35 | all_proposals (list[BoxList]): during training, the original proposals 36 | are returned. During testing, the predicted boxlists are returned 37 | with the `parsing` field set 38 | losses (dict[Tensor]): During training, returns the losses for the 39 | head. During testing, returns an empty dict. 40 | """ 41 | if self.training: 42 | return self._forward_train(conv_features, proposals, targets) 43 | else: 44 | return self._forward_test(conv_features, proposals) 45 | 46 | def _forward_train(self, conv_features, proposals, targets=None): 47 | all_proposals = proposals 48 | with torch.no_grad(): 49 | proposals = self.loss_evaluator.resample(proposals, targets) 50 | 51 | x, roi_feature = self.Head(conv_features, proposals) 52 | parsing_logits = self.Output(x) 53 | 54 | loss_parsing = self.loss_evaluator(parsing_logits) 55 | return x, all_proposals, dict(loss_parsing=loss_parsing) 56 | 57 | def _forward_test(self, conv_features, proposals): 58 | x, roi_feature = self.Head(conv_features, proposals) 59 | parsing_logits = self.Output(x) 60 | 61 | result = self.post_processor(parsing_logits, proposals) 62 | return x, result, {} 63 | -------------------------------------------------------------------------------- /rcnn/modeling/registry.py: -------------------------------------------------------------------------------- 1 | from utils.registry import Registry 2 | 3 | 4 | """ 5 | Feature Extractor. 6 | """ 7 | # Backbone 8 | BACKBONES = Registry() 9 | 10 | # FPN 11 | FPN_BODY = Registry() 12 | 13 | 14 | """ 15 | ROI Head. 16 | """ 17 | # Box Head 18 | ROI_CLS_HEADS = Registry() 19 | ROI_CLS_OUTPUTS = Registry() 20 | ROI_BOX_HEADS = Registry() 21 | ROI_BOX_OUTPUTS = Registry() 22 | 23 | # Cascade Head 24 | ROI_CASCADE_HEADS = Registry() 25 | ROI_CASCADE_OUTPUTS = Registry() 26 | 27 | # Mask Head 28 | ROI_MASK_HEADS = Registry() 29 | ROI_MASK_OUTPUTS = Registry() 30 | 31 | # Keypoint Head 32 | ROI_KEYPOINT_HEADS = Registry() 33 | ROI_KEYPOINT_OUTPUTS = Registry() 34 | 35 | # Parsing Head 36 | ROI_PARSING_HEADS = Registry() 37 | ROI_PARSING_OUTPUTS = Registry() 38 | 39 | # UV Head 40 | ROI_UV_HEADS = Registry() 41 | ROI_UV_OUTPUTS = Registry() 42 | 43 | -------------------------------------------------------------------------------- /rcnn/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convx_heads import * 2 | from .gce_heads import * 3 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/heads/convx_heads.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from utils.net import make_conv 5 | from rcnn.utils.poolers import Pooler 6 | from rcnn.modeling import registry 7 | from rcnn.core.config import cfg 8 | 9 | 10 | @registry.ROI_UV_HEADS.register("roi_convx_head") 11 | class roi_convx_head(nn.Module): 12 | def __init__(self, dim_in, spatial_scale): 13 | super(roi_convx_head, self).__init__() 14 | self.dim_in = dim_in[-1] 15 | 16 | method = cfg.UVRCNN.ROI_XFORM_METHOD 17 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION 18 | sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO 19 | pooler = Pooler( 20 | method=method, 21 | output_size=resolution, 22 | scales=spatial_scale, 23 | sampling_ratio=sampling_ratio, 24 | ) 25 | self.pooler = pooler 26 | 27 | use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE 28 | use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN 29 | use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN 30 | conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM 31 | num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS 32 | dilation = cfg.UVRCNN.CONVX_HEAD.DILATION 33 | 34 | self.blocks = [] 35 | for layer_idx in range(num_stacked_convs): 36 | layer_name = "UV_fcn{}".format(layer_idx + 1) 37 | module = make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite, 38 | use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite) 39 | self.add_module(layer_name, module) 40 | self.dim_in = conv_dim 41 | self.blocks.append(layer_name) 42 | self.dim_out = self.dim_in 43 | 44 | def forward(self, x, proposals): 45 | x = self.pooler(x, proposals) 46 | 47 | for layer_name in self.blocks: 48 | x = F.relu(getattr(self, layer_name)(x)) 49 | 50 | return x 51 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/heads/gce_heads.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from models.ops import interpolate, NonLocal2d 5 | from rcnn.core.config import cfg 6 | from rcnn.modeling import registry 7 | from rcnn.utils.poolers import Pooler 8 | from utils.net import make_conv 9 | 10 | 11 | @registry.ROI_UV_HEADS.register("roi_gce_head") 12 | class roi_gce_head(nn.Module): 13 | def __init__(self, dim_in, spatial_scale): 14 | super(roi_gce_head, self).__init__() 15 | self.dim_in = dim_in[-1] 16 | 17 | method = cfg.UVRCNN.ROI_XFORM_METHOD 18 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION 19 | sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO 20 | pooler = Pooler( 21 | method=method, 22 | output_size=resolution, 23 | scales=spatial_scale, 24 | sampling_ratio=sampling_ratio, 25 | ) 26 | self.pooler = pooler 27 | 28 | use_nl = cfg.UVRCNN.GCE_HEAD.USE_NL 29 | use_bn = cfg.UVRCNN.GCE_HEAD.USE_BN 30 | use_gn = cfg.UVRCNN.GCE_HEAD.USE_GN 31 | conv_dim = cfg.UVRCNN.GCE_HEAD.CONV_DIM 32 | asppv3_dim = cfg.UVRCNN.GCE_HEAD.ASPPV3_DIM 33 | num_convs_before_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3 34 | asppv3_dilation = cfg.UVRCNN.GCE_HEAD.ASPPV3_DILATION 35 | num_convs_after_asppv3 = cfg.UVRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3 36 | 37 | # convx before asppv3 module 38 | before_asppv3_list = [] 39 | for _ in range(num_convs_before_asppv3): 40 | before_asppv3_list.append( 41 | make_conv(self.dim_in, conv_dim, kernel=3, stride=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 42 | ) 43 | self.dim_in = conv_dim 44 | self.conv_before_asppv3 = nn.Sequential(*before_asppv3_list) if len(before_asppv3_list) else None 45 | 46 | # asppv3 module 47 | self.asppv3 = [] 48 | self.asppv3.append( 49 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 50 | ) 51 | for dilation in asppv3_dilation: 52 | self.asppv3.append( 53 | make_conv(self.dim_in, asppv3_dim, kernel=3, dilation=dilation, use_bn=use_bn, use_gn=use_gn, 54 | use_relu=True) 55 | ) 56 | self.asppv3 = nn.ModuleList(self.asppv3) 57 | self.im_pool = nn.Sequential( 58 | nn.AdaptiveAvgPool2d(1), 59 | make_conv(self.dim_in, asppv3_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 60 | ) 61 | self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim 62 | 63 | feat_list = [] 64 | feat_list.append( 65 | make_conv(self.dim_in, conv_dim, kernel=1, use_bn=use_bn, use_gn=use_gn, use_relu=True) 66 | ) 67 | if use_nl: 68 | feat_list.append( 69 | NonLocal2d(conv_dim, int(conv_dim * cfg.UVRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True) 70 | ) 71 | self.feat = nn.Sequential(*feat_list) 72 | self.dim_in = conv_dim 73 | 74 | # convx after asppv3 module 75 | assert num_convs_after_asppv3 >= 1 76 | after_asppv3_list = [] 77 | for _ in range(num_convs_after_asppv3): 78 | after_asppv3_list.append( 79 | make_conv(self.dim_in, conv_dim, kernel=3, use_bn=use_bn, use_gn=use_gn, use_relu=True) 80 | ) 81 | self.dim_in = conv_dim 82 | self.conv_after_asppv3 = nn.Sequential(*after_asppv3_list) if len(after_asppv3_list) else None 83 | self.dim_out = self.dim_in 84 | 85 | def forward(self, x, proposals): 86 | resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION 87 | x = self.pooler(x, proposals) 88 | 89 | if self.conv_before_asppv3 is not None: 90 | x = self.conv_before_asppv3(x) 91 | 92 | asppv3_out = [interpolate(self.im_pool(x), scale_factor=resolution, 93 | mode="bilinear", align_corners=False)] 94 | for i in range(len(self.asppv3)): 95 | asppv3_out.append(self.asppv3[i](x)) 96 | asppv3_out = torch.cat(asppv3_out, 1) 97 | asppv3_out = self.feat(asppv3_out) 98 | 99 | if self.conv_after_asppv3 is not None: 100 | x = self.conv_after_asppv3(asppv3_out) 101 | return x 102 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from utils.data.structures.bounding_box import BoxList 8 | from rcnn.core.config import cfg 9 | 10 | 11 | # TODO check if want to return a single BoxList or a composite 12 | # object 13 | class UVPostProcessor(nn.Module): 14 | """ 15 | From the results of the CNN, post process the masks 16 | by taking the mask corresponding to the class with max 17 | probability (which are of fixed size and directly output 18 | by the CNN) and return the masks in the mask field of the BoxList. 19 | 20 | If a masker object is passed, it will additionally 21 | project the masks in the image according to the locations in boxes, 22 | """ 23 | 24 | def __init__(self): 25 | super(UVPostProcessor, self).__init__() 26 | 27 | def forward(self, uv_logits, boxes): 28 | """ 29 | Arguments: 30 | uv_logits (List): the uv logits 31 | boxes (list[BoxList]): bounding boxes that are used as 32 | reference, one for ech image 33 | 34 | Returns: 35 | results (list[BoxList]): one BoxList for each image, containing 36 | the extra field mask 37 | """ 38 | UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V = uv_logits 39 | 40 | boxes_per_image = [len(box) for box in boxes] 41 | UV_pred_Ann = UV_pred_Ann.split(boxes_per_image, dim=0) 42 | UV_pred_Index = UV_pred_Index.split(boxes_per_image, dim=0) 43 | UV_pred_U = UV_pred_U.split(boxes_per_image, dim=0) 44 | UV_pred_V = UV_pred_V.split(boxes_per_image, dim=0) 45 | 46 | results = [] 47 | for Ann, Index, U, V, box in zip(UV_pred_Ann, UV_pred_Index, UV_pred_U, UV_pred_V, boxes): 48 | bbox = BoxList(box.bbox, box.size, mode="xyxy") 49 | for field in box.fields(): 50 | bbox.add_field(field, box.get_field(field)) 51 | bbox.add_field("uv", [Ann.cpu().numpy(), Index.cpu().numpy(), U.cpu().numpy(), V.cpu().numpy()]) 52 | results.append(bbox) 53 | 54 | return results 55 | 56 | 57 | def uv_results(uv_logits, boxes): 58 | AnnIndex, Index_UV, U_uv, V_uv = uv_logits 59 | K = cfg.UVRCNN.NUM_PATCHES + 1 60 | boxes = boxes.bbox.numpy() 61 | uvs_results = [] 62 | for ind, entry in enumerate(boxes): 63 | # Compute ref box width and height 64 | bx = max(entry[2] - entry[0], 1) 65 | by = max(entry[3] - entry[1], 1) 66 | 67 | # preds[ind] axes are CHW; bring p axes to WHC 68 | CurAnnIndex = np.swapaxes(AnnIndex[ind], 0, 2) 69 | CurIndex_UV = np.swapaxes(Index_UV[ind], 0, 2) 70 | CurU_uv = np.swapaxes(U_uv[ind], 0, 2) 71 | CurV_uv = np.swapaxes(V_uv[ind], 0, 2) 72 | 73 | # Resize p from (HEATMAP_SIZE, HEATMAP_SIZE, c) to (int(bx), int(by), c) 74 | CurAnnIndex = cv2.resize(CurAnnIndex, (by, bx)) 75 | CurIndex_UV = cv2.resize(CurIndex_UV, (by, bx)) 76 | CurU_uv = cv2.resize(CurU_uv, (by, bx)) 77 | CurV_uv = cv2.resize(CurV_uv, (by, bx)) 78 | 79 | # Bring Cur_Preds axes back to CHW 80 | CurAnnIndex = np.swapaxes(CurAnnIndex, 0, 2) 81 | CurIndex_UV = np.swapaxes(CurIndex_UV, 0, 2) 82 | CurU_uv = np.swapaxes(CurU_uv, 0, 2) 83 | CurV_uv = np.swapaxes(CurV_uv, 0, 2) 84 | 85 | # Removed squeeze calls due to singleton dimension issues 86 | CurAnnIndex = np.argmax(CurAnnIndex, axis=0) 87 | CurIndex_UV = np.argmax(CurIndex_UV, axis=0) 88 | CurIndex_UV = CurIndex_UV * (CurAnnIndex>0).astype(np.float32) 89 | 90 | output = np.zeros([3, int(by), int(bx)], dtype=np.float32) 91 | output[0] = CurIndex_UV 92 | 93 | for part_id in range(1, K): 94 | CurrentU = CurU_uv[part_id] 95 | CurrentV = CurV_uv[part_id] 96 | output[1, CurIndex_UV==part_id] = CurrentU[CurIndex_UV==part_id] 97 | output[2, CurIndex_UV==part_id] = CurrentV[CurIndex_UV==part_id] 98 | uvs_results.append(output) 99 | return uvs_results 100 | 101 | 102 | def uv_post_processor(): 103 | uv_post_processor = UVPostProcessor() 104 | return uv_post_processor 105 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/outputs.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from rcnn.modeling import registry 5 | from rcnn.core.config import cfg 6 | 7 | 8 | @registry.ROI_UV_OUTPUTS.register("uv_output") 9 | class UV_output(nn.Module): 10 | def __init__(self, dim_in): 11 | super(UV_output, self).__init__() 12 | num_patches = cfg.UVRCNN.NUM_PATCHES 13 | deconv_kernel = 4 14 | assert cfg.UVRCNN.RESOLUTION[0] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] == \ 15 | cfg.UVRCNN.RESOLUTION[1] // cfg.UVRCNN.ROI_XFORM_RESOLUTION[1] 16 | self.up_scale = cfg.UVRCNN.RESOLUTION[0] // (cfg.UVRCNN.ROI_XFORM_RESOLUTION[0] * 2) 17 | 18 | self.deconv_Ann = nn.ConvTranspose2d(dim_in, 15, deconv_kernel, 2, padding=deconv_kernel // 2 - 1) 19 | self.deconv_Index = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, 20 | padding=deconv_kernel // 2 - 1) 21 | self.deconv_U = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1) 22 | self.deconv_V = nn.ConvTranspose2d(dim_in, num_patches + 1, deconv_kernel, 2, padding=deconv_kernel // 2 - 1) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.ConvTranspose2d): 26 | nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") 27 | nn.init.constant_(m.bias, 0) 28 | 29 | def forward(self, x): 30 | x_Ann = self.deconv_Ann(x) 31 | x_Index = self.deconv_Index(x) 32 | x_U = self.deconv_U(x) 33 | x_V = self.deconv_V(x) 34 | 35 | if self.up_scale > 1: 36 | x_Ann = F.interpolate(x_Ann, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 37 | x_Index = F.interpolate(x_Index, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 38 | x_U = F.interpolate(x_U, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 39 | x_V = F.interpolate(x_V, scale_factor=self.up_scale, mode="bilinear", align_corners=False) 40 | 41 | return [x_Ann, x_Index, x_U, x_V] 42 | -------------------------------------------------------------------------------- /rcnn/modeling/uv_rcnn/uv_rcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from rcnn.modeling.uv_rcnn import heads 5 | from rcnn.modeling.uv_rcnn import outputs 6 | from rcnn.modeling.uv_rcnn.inference import uv_post_processor 7 | from rcnn.modeling.uv_rcnn.loss import uv_loss_evaluator 8 | from rcnn.modeling import registry 9 | from rcnn.core.config import cfg 10 | 11 | 12 | class UVRCNN(torch.nn.Module): 13 | def __init__(self, dim_in, spatial_scale): 14 | super(UVRCNN, self).__init__() 15 | if len(cfg.UVRCNN.ROI_STRIDES) == 0: 16 | self.spatial_scale = spatial_scale 17 | else: 18 | self.spatial_scale = [1. / stride for stride in cfg.UVRCNN.ROI_STRIDES] 19 | # self.roi_batch_size = cfg.UVRCNN.ROI_BATCH_SIZE # TODO 20 | 21 | head = registry.ROI_UV_HEADS[cfg.UVRCNN.ROI_UV_HEAD] 22 | self.Head = head(dim_in, self.spatial_scale) 23 | output = registry.ROI_UV_OUTPUTS[cfg.UVRCNN.ROI_UV_OUTPUT] 24 | self.Output = output(self.Head.dim_out) 25 | 26 | self.post_processor = uv_post_processor() 27 | self.loss_evaluator = uv_loss_evaluator() 28 | 29 | def forward(self, conv_features, proposals, targets=None): 30 | if self.training: 31 | return self._forward_train(conv_features, proposals, targets) 32 | else: 33 | return self._forward_test(conv_features, proposals) 34 | 35 | def _forward_train(self, conv_features, proposals, targets=None): 36 | all_proposals = proposals 37 | with torch.no_grad(): 38 | proposals = self.loss_evaluator.resample(proposals, targets) 39 | 40 | x = self.Head(conv_features, proposals) 41 | uv_logits = self.Output(x) 42 | 43 | loss_Upoints, loss_Vpoints, loss_seg_AnnIndex, loss_IndexUVPoints = self.loss_evaluator(uv_logits) 44 | loss_dict = dict(loss_Upoints=loss_Upoints, loss_Vpoints=loss_Vpoints, 45 | loss_seg_Ann=loss_seg_AnnIndex, loss_IPoints=loss_IndexUVPoints) 46 | 47 | return x, all_proposals, loss_dict 48 | 49 | def _forward_test(self, conv_features, proposals): 50 | x = self.Head(conv_features, proposals) 51 | uv_logits = self.Output(x) 52 | 53 | result = self.post_processor(uv_logits, proposals) 54 | return x, result, {} 55 | -------------------------------------------------------------------------------- /rcnn/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import ROIAlign 2 | from .roi_align import roi_align 3 | from .roi_pool import ROIPool 4 | from .roi_pool import roi_pool 5 | from .deform_pool import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 6 | from .deform_pool import deform_roi_pooling 7 | -------------------------------------------------------------------------------- /rcnn/ops/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from models.ops import _C 8 | 9 | from apex import amp 10 | 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | ctx.aligned = aligned 21 | output = _C.roi_align_forward( 22 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 23 | ) 24 | return output 25 | 26 | @staticmethod 27 | @once_differentiable 28 | def backward(ctx, grad_output): 29 | rois, = ctx.saved_tensors 30 | output_size = ctx.output_size 31 | spatial_scale = ctx.spatial_scale 32 | sampling_ratio = ctx.sampling_ratio 33 | bs, ch, h, w = ctx.input_shape 34 | grad_input = _C.roi_align_backward( 35 | grad_output, rois, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, sampling_ratio, ctx.aligned 36 | ) 37 | return grad_input, None, None, None, None, None 38 | 39 | 40 | roi_align = _ROIAlign.apply 41 | 42 | 43 | class ROIAlign(nn.Module): 44 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned): 45 | super(ROIAlign, self).__init__() 46 | self.output_size = output_size 47 | self.spatial_scale = spatial_scale 48 | self.sampling_ratio = sampling_ratio 49 | self.aligned = aligned 50 | 51 | @amp.float_function 52 | def forward(self, input, rois): 53 | return roi_align( 54 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned 55 | ) 56 | 57 | def __repr__(self): 58 | tmpstr = self.__class__.__name__ + "(" 59 | tmpstr += "output_size=" + str(self.output_size) 60 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 61 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /rcnn/ops/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from models.ops import _C 8 | 9 | from apex import amp 10 | 11 | 12 | class _ROIPool(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale): 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.input_shape = input.size() 18 | output, argmax = _C.roi_pool_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1] 20 | ) 21 | ctx.save_for_backward(input, roi, argmax) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | input, rois, argmax = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_pool_backward( 32 | grad_output, input, rois, argmax, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, 33 | ) 34 | return grad_input, None, None, None 35 | 36 | 37 | roi_pool = _ROIPool.apply 38 | 39 | 40 | class ROIPool(nn.Module): 41 | def __init__(self, output_size, spatial_scale): 42 | super(ROIPool, self).__init__() 43 | self.output_size = output_size 44 | self.spatial_scale = spatial_scale 45 | 46 | @amp.float_function 47 | def forward(self, input, rois): 48 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 49 | 50 | def __repr__(self): 51 | tmpstr = self.__class__.__name__ + "(" 52 | tmpstr += "output_size=" + str(self.output_size) 53 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 54 | tmpstr += ")" 55 | return tmpstr 56 | -------------------------------------------------------------------------------- /rcnn/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/rcnn/utils/__init__.py -------------------------------------------------------------------------------- /rcnn/utils/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BalancedPositiveNegativeSampler(object): 5 | """ 6 | This class samples batches, ensuring that they contain a fixed proportion of positives 7 | """ 8 | 9 | def __init__(self, batch_size_per_image, positive_fraction): 10 | """ 11 | Arguments: 12 | batch_size_per_image (int): number of elements to be selected per image 13 | positive_fraction (float): percentace of positive elements per batch 14 | """ 15 | self.batch_size_per_image = batch_size_per_image 16 | self.positive_fraction = positive_fraction 17 | 18 | def __call__(self, matched_idxs): 19 | """ 20 | Arguments: 21 | matched idxs: list of tensors containing -1, 0 or positive values. 22 | Each tensor corresponds to a specific image. 23 | -1 values are ignored, 0 are considered as negatives and > 0 as 24 | positives. 25 | 26 | Returns: 27 | pos_idx (list[tensor]) 28 | neg_idx (list[tensor]) 29 | 30 | Returns two lists of binary masks for each image. 31 | The first list contains the positive elements that were selected, 32 | and the second list the negative example. 33 | """ 34 | pos_idx = [] 35 | neg_idx = [] 36 | for matched_idxs_per_image in matched_idxs: 37 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 38 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 39 | 40 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 41 | # protect against not enough positive examples 42 | num_pos = min(positive.numel(), num_pos) 43 | num_neg = self.batch_size_per_image - num_pos 44 | # protect against not enough negative examples 45 | num_neg = min(negative.numel(), num_neg) 46 | 47 | # randomly select positive and negative examples 48 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 49 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 50 | 51 | pos_idx_per_image = positive[perm1] 52 | neg_idx_per_image = negative[perm2] 53 | 54 | # create binary mask from indices 55 | pos_idx_per_image_mask = torch.zeros_like( 56 | matched_idxs_per_image, dtype=torch.uint8 57 | ) 58 | neg_idx_per_image_mask = torch.zeros_like( 59 | matched_idxs_per_image, dtype=torch.uint8 60 | ) 61 | pos_idx_per_image_mask[pos_idx_per_image] = 1 62 | neg_idx_per_image_mask[neg_idx_per_image] = 1 63 | 64 | pos_idx.append(pos_idx_per_image_mask) 65 | neg_idx.append(neg_idx_per_image_mask) 66 | 67 | return pos_idx, neg_idx 68 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.13 2 | pyyaml>=3.12 3 | matplotlib 4 | opencv-python>=3.4.0 5 | setuptools 6 | Cython 7 | scipy 8 | six 9 | scikit-image 10 | Pillow>=6.1.0 11 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH 2 | 3 | Usage: 4 | import this module before import any modules under lib/ 5 | e.g 6 | import _init_paths 7 | from core.config import cfg 8 | """ 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | 14 | def add_path(path): 15 | if path not in sys.path: 16 | sys.path.insert(0, path) 17 | 18 | 19 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__))) 20 | 21 | # Add lib to PYTHONPATH 22 | lib_path = osp.join(this_dir) 23 | add_path(lib_path) 24 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import _init_paths # pylint: disable=unused-import 5 | from utils.misc import mkdir_p, logging_rank 6 | 7 | from rcnn.core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg 8 | from rcnn.core.test_engine import run_inference 9 | 10 | # Parse arguments 11 | parser = argparse.ArgumentParser(description='Hier R-CNN Model Testing') 12 | parser.add_argument('--cfg', dest='cfg_file', 13 | help='optional config file', 14 | default='./cfgs/mscoco_humanparts/e2e_hier_rcnn_R-50-FPN_1x.yaml', type=str) 15 | parser.add_argument('--gpu_id', type=str, default='0,1,2,3,4,5,6,7', help='gpu id for evaluation') 16 | parser.add_argument('--range', help='start (inclusive) and end (exclusive) indices', type=int, nargs=2) 17 | parser.add_argument('opts', help='See rcnn/core/config.py for all options', 18 | default=None, 19 | nargs=argparse.REMAINDER) 20 | args = parser.parse_args() 21 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id 22 | 23 | 24 | def main(): 25 | if len(args.gpu_id.split(',')) == 1: 26 | local_rank = int(args.gpu_id.split(',')[0]) 27 | else: 28 | local_rank = -1 29 | args.local_rank = local_rank 30 | 31 | num_gpus = len(args.gpu_id.split(',')) 32 | multi_gpu_testing = True if num_gpus > 1 else False 33 | 34 | if args.cfg_file is not None: 35 | merge_cfg_from_file(args.cfg_file) 36 | if args.opts is not None: 37 | merge_cfg_from_list(args.opts) 38 | 39 | if not os.path.isdir(os.path.join(cfg.CKPT, 'test')): 40 | mkdir_p(os.path.join(cfg.CKPT, 'test')) 41 | if cfg.VIS.ENABLED: 42 | if not os.path.exists(os.path.join(cfg.CKPT, 'vis')): 43 | mkdir_p(os.path.join(cfg.CKPT, 'vis')) 44 | 45 | assert_and_infer_cfg(make_immutable=False) 46 | args.test_net_file, _ = os.path.splitext(__file__) 47 | run_inference( 48 | args, 49 | ind_range=args.range, 50 | multi_gpu_testing=multi_gpu_testing 51 | ) 52 | 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/__init__.py -------------------------------------------------------------------------------- /utils/collections.py: -------------------------------------------------------------------------------- 1 | class AttrDict(dict): 2 | 3 | IMMUTABLE = '__immutable__' 4 | 5 | def __init__(self, *args, **kwargs): 6 | super(AttrDict, self).__init__(*args, **kwargs) 7 | self.__dict__[AttrDict.IMMUTABLE] = False 8 | 9 | def __getattr__(self, name): 10 | if name in self.__dict__: 11 | return self.__dict__[name] 12 | elif name in self: 13 | return self[name] 14 | else: 15 | raise AttributeError(name) 16 | 17 | def __setattr__(self, name, value): 18 | if not self.__dict__[AttrDict.IMMUTABLE]: 19 | if name in self.__dict__: 20 | self.__dict__[name] = value 21 | else: 22 | self[name] = value 23 | else: 24 | raise AttributeError( 25 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 26 | format(name, value) 27 | ) 28 | 29 | def immutable(self, is_immutable): 30 | """Set immutability to is_immutable and recursively apply the setting 31 | to all nested AttrDicts. 32 | """ 33 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 34 | # Recursively set immutable state 35 | for v in self.__dict__.values(): 36 | if isinstance(v, AttrDict): 37 | v.immutable(is_immutable) 38 | for v in self.values(): 39 | if isinstance(v, AttrDict): 40 | v.immutable(is_immutable) 41 | 42 | def is_immutable(self): 43 | return self.__dict__[AttrDict.IMMUTABLE] 44 | -------------------------------------------------------------------------------- /utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | import numpy as np 9 | 10 | import torch 11 | import torch.distributed as dist 12 | 13 | 14 | def get_world_size(): 15 | if not dist.is_available(): 16 | return 1 17 | if not dist.is_initialized(): 18 | return 1 19 | return dist.get_world_size() 20 | 21 | 22 | def get_rank(): 23 | if not dist.is_available(): 24 | return 0 25 | if not dist.is_initialized(): 26 | return 0 27 | return dist.get_rank() 28 | 29 | 30 | def is_main_process(): 31 | return get_rank() == 0 32 | 33 | 34 | def synchronize(): 35 | """ 36 | Helper function to synchronize (barrier) among all processes when 37 | using distributed training 38 | """ 39 | if not dist.is_available(): 40 | return 41 | if not dist.is_initialized(): 42 | return 43 | world_size = dist.get_world_size() 44 | if world_size == 1: 45 | return 46 | dist.barrier() 47 | 48 | 49 | def all_gather(data): 50 | """ 51 | Run all_gather on arbitrary picklable data (not necessarily tensors) 52 | Args: 53 | data: any picklable object 54 | Returns: 55 | list[data]: list of data gathered from each rank 56 | """ 57 | world_size = get_world_size() 58 | if world_size == 1: 59 | return [data] 60 | 61 | # serialized to a Tensor 62 | buffer = pickle.dumps(data) 63 | storage = torch.ByteStorage.from_buffer(buffer) 64 | tensor = torch.ByteTensor(storage).to("cuda") 65 | 66 | # obtain Tensor size of each rank 67 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 68 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 69 | dist.all_gather(size_list, local_size) 70 | size_list = [int(size.item()) for size in size_list] 71 | max_size = max(size_list) 72 | 73 | # receiving Tensor from all ranks 74 | # we pad the tensor because torch all_gather does not support 75 | # gathering tensors of different shapes 76 | tensor_list = [] 77 | for _ in size_list: 78 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 79 | if local_size != max_size: 80 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 81 | tensor = torch.cat((tensor, padding), dim=0) 82 | dist.all_gather(tensor_list, tensor) 83 | 84 | data_list = [] 85 | for size, tensor in zip(size_list, tensor_list): 86 | buffer = tensor.cpu().numpy().tobytes()[:size] 87 | data_list.append(pickle.loads(buffer)) 88 | 89 | return data_list 90 | 91 | 92 | def shared_random_seed(): 93 | """ 94 | Returns: 95 | int: a random number that is the same across all workers. 96 | If workers need a shared RNG, they can use this shared seed to 97 | create one. 98 | 99 | All workers must call this function, otherwise it will deadlock. 100 | """ 101 | ints = np.random.randint(2 ** 31) 102 | all_ints = all_gather(ints) 103 | return all_ints[0] 104 | 105 | 106 | def reduce_dict(input_dict, average=True): 107 | """ 108 | Args: 109 | input_dict (dict): all the values will be reduced 110 | average (bool): whether to do average or sum 111 | Reduce the values in the dictionary from all processes so that process with rank 112 | 0 has the averaged results. Returns a dict with the same fields as 113 | input_dict, after reduction. 114 | """ 115 | world_size = get_world_size() 116 | if world_size < 2: 117 | return input_dict 118 | with torch.no_grad(): 119 | names = [] 120 | values = [] 121 | # sort the keys so that they are consistent across processes 122 | for k in sorted(input_dict.keys()): 123 | names.append(k) 124 | values.append(input_dict[k]) 125 | values = torch.stack(values, dim=0) 126 | dist.reduce(values, dst=0) 127 | if dist.get_rank() == 0 and average: 128 | # only main process gets accumulated, so only divide by 129 | # world_size in this case 130 | values /= world_size 131 | reduced_dict = {k: v for k, v in zip(names, values)} 132 | return reduced_dict 133 | -------------------------------------------------------------------------------- /utils/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/__init__.py -------------------------------------------------------------------------------- /utils/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | from utils.data.structures.image_list import to_image_list 2 | 3 | 4 | class BatchCollator(object): 5 | """ 6 | From a list of samples from the dataset, 7 | returns the batched images and targets. 8 | This should be passed to the DataLoader 9 | """ 10 | 11 | def __init__(self, size_divisible=0): 12 | self.size_divisible = size_divisible 13 | 14 | def __call__(self, batch): 15 | transposed_batch = list(zip(*batch)) 16 | images = to_image_list(transposed_batch[0], self.size_divisible) 17 | targets = transposed_batch[1] 18 | img_ids = transposed_batch[2] 19 | return images, targets, img_ids 20 | -------------------------------------------------------------------------------- /utils/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco import COCODataset 2 | from .concat_dataset import ConcatDataset 3 | -------------------------------------------------------------------------------- /utils/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | 3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 4 | 5 | 6 | class ConcatDataset(_ConcatDataset): 7 | """ 8 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 9 | method for querying the sizes of the image 10 | """ 11 | 12 | def get_idxs(self, idx): 13 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 14 | if dataset_idx == 0: 15 | sample_idx = idx 16 | else: 17 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 18 | return dataset_idx, sample_idx 19 | 20 | def get_img_info(self, idx): 21 | dataset_idx, sample_idx = self.get_idxs(idx) 22 | return self.datasets[dataset_idx].get_img_info(sample_idx) 23 | -------------------------------------------------------------------------------- /utils/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed import DistributedSampler 2 | from .repeat_factor import RepeatFactorTrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | from .range_sampler import RangeSampler 6 | -------------------------------------------------------------------------------- /utils/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.distributed as dist 4 | from torch.utils.data.sampler import Sampler 5 | 6 | 7 | class DistributedSampler(Sampler): 8 | """Sampler that restricts data loading to a subset of the dataset. 9 | It is especially useful in conjunction with 10 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 11 | process can pass a DistributedSampler instance as a DataLoader sampler, 12 | and load a subset of the original dataset that is exclusive to it. 13 | .. note:: 14 | Dataset is assumed to be of constant size. 15 | Arguments: 16 | dataset: Dataset used for sampling. 17 | num_replicas (optional): Number of processes participating in 18 | distributed training. 19 | rank (optional): Rank of the current process within num_replicas. 20 | """ 21 | 22 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 23 | if num_replicas is None: 24 | if not dist.is_available(): 25 | raise RuntimeError("Requires distributed package to be available") 26 | num_replicas = dist.get_world_size() 27 | if rank is None: 28 | if not dist.is_available(): 29 | raise RuntimeError("Requires distributed package to be available") 30 | rank = dist.get_rank() 31 | self.dataset = dataset 32 | self.num_replicas = num_replicas 33 | self.rank = rank 34 | self.epoch = 0 35 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 36 | self.total_size = self.num_samples * self.num_replicas 37 | self.shuffle = shuffle 38 | 39 | def __iter__(self): 40 | if self.shuffle: 41 | # deterministically shuffle based on epoch 42 | g = torch.Generator() 43 | g.manual_seed(self.epoch) 44 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 45 | else: 46 | indices = torch.arange(len(self.dataset)).tolist() 47 | 48 | # add extra samples to make it evenly divisible 49 | indices += indices[: (self.total_size - len(indices))] 50 | assert len(indices) == self.total_size 51 | 52 | # subsample 53 | offset = self.num_samples * self.rank 54 | indices = indices[offset : offset + self.num_samples] 55 | assert len(indices) == self.num_samples 56 | 57 | return iter(indices) 58 | 59 | def __len__(self): 60 | return self.num_samples 61 | 62 | def set_epoch(self, epoch): 63 | self.epoch = epoch 64 | -------------------------------------------------------------------------------- /utils/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.sampler import BatchSampler 2 | 3 | 4 | class IterationBasedBatchSampler(BatchSampler): 5 | """ 6 | Wraps a BatchSampler, resampling from it until 7 | a specified number of iterations have been sampled 8 | """ 9 | 10 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 11 | self.batch_sampler = batch_sampler 12 | self.num_iterations = num_iterations 13 | self.start_iter = start_iter 14 | 15 | def __iter__(self): 16 | iteration = self.start_iter 17 | while iteration <= self.num_iterations: 18 | # if the underlying sampler has a set_epoch method, like 19 | # DistributedSampler, used for making each process see 20 | # a different split of the dataset, then set it 21 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 22 | self.batch_sampler.sampler.set_epoch(iteration) 23 | for batch in self.batch_sampler: 24 | iteration += 1 25 | if iteration > self.num_iterations: 26 | break 27 | yield batch 28 | 29 | def __len__(self): 30 | return self.num_iterations 31 | -------------------------------------------------------------------------------- /utils/data/samplers/range_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import Sampler 3 | 4 | 5 | class RangeSampler(Sampler): 6 | def __init__(self, start_ind, end_ind): 7 | self.start_ind = start_ind 8 | self.end_ind = end_ind 9 | 10 | def __iter__(self): 11 | indices = torch.arange(self.start_ind, self.end_ind).tolist() 12 | return iter(indices) 13 | 14 | def __len__(self): 15 | return self.end_ind - self.start_ind 16 | -------------------------------------------------------------------------------- /utils/data/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/utils/data/structures/__init__.py -------------------------------------------------------------------------------- /utils/data/structures/image_list.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ImageList(object): 5 | """ 6 | Structure that holds a list of images (of possibly 7 | varying sizes) as a single tensor. 8 | This works by padding the images to the same size, 9 | and storing in a field the original sizes of each image 10 | """ 11 | 12 | def __init__(self, tensors, image_sizes): 13 | """ 14 | Arguments: 15 | tensors (tensor) 16 | image_sizes (list[tuple[int, int]]) 17 | """ 18 | self.tensors = tensors 19 | self.image_sizes = image_sizes 20 | 21 | def to(self, *args, **kwargs): 22 | cast_tensor = self.tensors.to(*args, **kwargs) 23 | return ImageList(cast_tensor, self.image_sizes) 24 | 25 | 26 | def to_image_list(tensors, size_divisible=0): 27 | """ 28 | tensors can be an ImageList, a torch.Tensor or 29 | an iterable of Tensors. It can't be a numpy array. 30 | When tensors is an iterable of Tensors, it pads 31 | the Tensors with zeros so that they have the same 32 | shape 33 | """ 34 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 35 | tensors = [tensors] 36 | 37 | if isinstance(tensors, ImageList): 38 | return tensors 39 | elif isinstance(tensors, torch.Tensor): 40 | # single tensor shape can be inferred 41 | if tensors.dim() == 3: 42 | tensors = tensors[None] 43 | assert tensors.dim() == 4 44 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 45 | return ImageList(tensors, image_sizes) 46 | elif isinstance(tensors, (tuple, list)): 47 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 48 | 49 | # TODO Ideally, just remove this and let me model handle arbitrary 50 | # input sizs 51 | if size_divisible > 0: 52 | import math 53 | 54 | stride = size_divisible 55 | max_size = list(max_size) 56 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 57 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 58 | max_size = tuple(max_size) 59 | 60 | batch_shape = (len(tensors),) + max_size 61 | batched_imgs = tensors[0].new(*batch_shape).zero_() 62 | for img, pad_img in zip(tensors, batched_imgs): 63 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 64 | 65 | image_sizes = [im.shape[-2:] for im in tensors] 66 | 67 | return ImageList(batched_imgs, image_sizes) 68 | else: 69 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 70 | -------------------------------------------------------------------------------- /utils/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import * 2 | -------------------------------------------------------------------------------- /utils/image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def aspect_ratio_rel(im, aspect_ratio): 6 | """Performs width-relative aspect ratio transformation.""" 7 | im_h, im_w = im.shape[:2] 8 | im_ar_w = int(round(aspect_ratio * im_w)) 9 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h)) 10 | return im_ar 11 | 12 | 13 | def aspect_ratio_abs(im, aspect_ratio): 14 | """Performs absolute aspect ratio transformation.""" 15 | im_h, im_w = im.shape[:2] 16 | im_area = im_h * im_w 17 | 18 | im_ar_w = np.sqrt(im_area * aspect_ratio) 19 | im_ar_h = np.sqrt(im_area / aspect_ratio) 20 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio) 21 | 22 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h))) 23 | return im_ar 24 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | import logging 4 | import numpy as np 5 | from six.moves import cPickle as pickle 6 | 7 | import torch 8 | import torch.distributed as dist 9 | 10 | # Set up logging and load config options 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | # logging only in rank 0 16 | def logging_rank(sstr, distributed=True, local_rank=0): 17 | if distributed and local_rank == 0: 18 | logger.info(sstr) 19 | elif not distributed: 20 | logger.info(sstr) 21 | return 0 22 | 23 | 24 | def get_mean_and_std(dataset): 25 | """Compute the mean and std value of dataset.""" 26 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2) 27 | 28 | mean = torch.zeros(3) 29 | std = torch.zeros(3) 30 | logger.info('Computing mean and std..') 31 | for inputs, targets in dataloader: 32 | for i in range(3): 33 | mean[i] += inputs[:, i, :, :].mean() 34 | std[i] += inputs[:, i, :, :].std() 35 | mean.div_(len(dataset)) 36 | std.div_(len(dataset)) 37 | return mean, std 38 | 39 | 40 | def mkdir_p(path): 41 | """make dir if not exist""" 42 | try: 43 | os.makedirs(path) 44 | except OSError as exc: # Python >2.5 45 | if exc.errno == errno.EEXIST and os.path.isdir(path): 46 | pass 47 | else: 48 | raise 49 | 50 | 51 | def save_object(obj, file_name): 52 | """Save a Python object by pickling it.""" 53 | file_name = os.path.abspath(file_name) 54 | with open(file_name, 'wb') as f: 55 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 56 | 57 | 58 | def get_world_size() -> int: 59 | if not dist.is_available(): 60 | return 1 61 | if not dist.is_initialized(): 62 | return 1 63 | return dist.get_world_size() 64 | -------------------------------------------------------------------------------- /utils/optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from utils.misc import logging_rank 5 | 6 | 7 | class Optimizer(object): 8 | def __init__(self, model, solver, local_rank=0): 9 | self.model = model 10 | self.solver = solver 11 | self.local_rank = local_rank 12 | 13 | self.bias_params_list = [] 14 | self.gn_params_list = [] 15 | self.nonbias_params_list = [] 16 | 17 | self.params = [] 18 | self.gn_param_nameset = self.get_gn_param_nameset() 19 | 20 | def get_gn_param_nameset(self): 21 | gn_param_nameset = set() 22 | for name, module in self.model.named_modules(): 23 | if isinstance(module, nn.GroupNorm): 24 | gn_param_nameset.add(name + '.weight') 25 | gn_param_nameset.add(name + '.bias') 26 | return gn_param_nameset 27 | 28 | def get_params_list(self): 29 | for key, value in self.model.named_parameters(): 30 | if value.requires_grad: 31 | if 'bias' in key: 32 | self.bias_params_list.append(value) 33 | elif key in self.gn_param_nameset: 34 | self.gn_params_list.append(value) 35 | else: 36 | self.nonbias_params_list.append(value) 37 | else: 38 | logging_rank('{} does not need grad.'.format(key), local_rank=self.local_rank) 39 | 40 | def get_params(self): 41 | self.params += [ 42 | {'params': self.nonbias_params_list, 43 | 'lr': 0, 44 | 'weight_decay': self.solver.WEIGHT_DECAY, 45 | 'lr_scale': 1}, 46 | {'params': self.bias_params_list, 47 | 'lr': 0 * (self.solver.BIAS_DOUBLE_LR + 1), 48 | 'weight_decay': self.solver.WEIGHT_DECAY if self.solver.BIAS_WEIGHT_DECAY else 0, 49 | 'lr_scale': self.solver.BIAS_DOUBLE_LR + 1}, 50 | {'params': self.gn_params_list, 51 | 'lr': 0, 52 | 'weight_decay': self.solver.WEIGHT_DECAY_GN * self.solver.WEIGHT_DECAY, 53 | 'lr_scale': 1} 54 | ] 55 | 56 | def build(self): 57 | assert self.solver.OPTIMIZER in ['SGD', 'RMSPROP', 'ADAM'] 58 | self.get_params_list() 59 | self.get_params() 60 | 61 | if self.solver.OPTIMIZER == 'SGD': 62 | optimizer = torch.optim.SGD( 63 | self.params, 64 | momentum=self.solver.MOMENTUM 65 | ) 66 | elif self.solver.OPTIMIZER == 'RMSPROP': 67 | optimizer = torch.optim.RMSprop( 68 | self.params, 69 | momentum=self.solver.MOMENTUM 70 | ) 71 | elif self.solver.OPTIMIZER == 'ADAM': 72 | optimizer = torch.optim.Adam( 73 | self.model.parameters(), 74 | lr=self.solver.BASE_LR 75 | ) 76 | else: 77 | optimizer = None 78 | return optimizer 79 | -------------------------------------------------------------------------------- /utils/registry.py: -------------------------------------------------------------------------------- 1 | def _register_generic(module_dict, module_name, module): 2 | assert module_name not in module_dict 3 | module_dict[module_name] = module 4 | 5 | 6 | class Registry(dict): 7 | """ 8 | A helper class for managing registering modules, it extends a dictionary 9 | and provides a register functions. 10 | Eg. creeting a registry: 11 | some_registry = Registry({"default": default_module}) 12 | There're two ways of registering new modules: 13 | 1): normal way is just calling register function: 14 | def foo(): 15 | ... 16 | some_registry.register("foo_module", foo) 17 | 2): used as decorator when declaring the module: 18 | @some_registry.register("foo_module") 19 | @some_registry.register("foo_modeul_nickname") 20 | def foo(): 21 | ... 22 | Access of module is just like using a dictionary, eg: 23 | f = some_registry["foo_modeul"] 24 | """ 25 | def __init__(self, *args, **kwargs): 26 | super(Registry, self).__init__(*args, **kwargs) 27 | 28 | def register(self, module_name, module=None): 29 | # used as function call 30 | if module is not None: 31 | _register_generic(self, module_name, module) 32 | return 33 | 34 | # used as decorator 35 | def register_fn(fn): 36 | _register_generic(self, module_name, fn) 37 | return fn 38 | 39 | return register_fn 40 | 41 | -------------------------------------------------------------------------------- /utils/subprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import subprocess 4 | import numpy as np 5 | from io import IOBase 6 | from six.moves import shlex_quote 7 | from six.moves import cPickle as pickle 8 | 9 | from utils.misc import logging_rank 10 | 11 | 12 | def process_in_parallel(tag, total_range_size, binary, cfg, ckpt_path): 13 | """Run the specified binary NUM_GPUS times in parallel, each time as a 14 | subprocess that uses one GPU. The binary must accept the command line 15 | arguments `--range {start} {end}` that specify a data processing range. 16 | """ 17 | # subprocesses 18 | cfg_file = os.path.join(ckpt_path, 'test', '{}_range_config.yaml'.format(tag)) 19 | with open(cfg_file, 'w') as f: 20 | yaml.dump(cfg, stream=f) 21 | subprocess_env = os.environ.copy() 22 | processes = [] 23 | # Determine GPUs to use 24 | cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') 25 | if cuda_visible_devices: 26 | gpu_inds = list(map(int, cuda_visible_devices.split(','))) 27 | assert -1 not in gpu_inds, \ 28 | 'Hiding GPU indices using the \'-1\' index is not supported' 29 | else: 30 | raise NotImplementedError 31 | subinds = np.array_split(range(total_range_size), len(gpu_inds)) 32 | # Run the binary in cfg.NUM_GPUS subprocesses 33 | for i, gpu_ind in enumerate(gpu_inds): 34 | start = subinds[i][0] 35 | end = subinds[i][-1] + 1 36 | subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind) 37 | cmd = ('python {binary} --range {start} {end} --cfg {cfg_file} --gpu_id {gpu_id}') 38 | cmd = cmd.format( 39 | binary=shlex_quote(binary), 40 | start=int(start), 41 | end=int(end), 42 | cfg_file=shlex_quote(cfg_file), 43 | gpu_id=str(gpu_ind), 44 | ) 45 | logging_rank('{} range command {}: {}'.format(tag, i, cmd)) 46 | if i == 0: 47 | subprocess_stdout = subprocess.PIPE 48 | else: 49 | filename = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end)) 50 | subprocess_stdout = open(filename, 'w') 51 | p = subprocess.Popen( 52 | cmd, 53 | shell=True, 54 | env=subprocess_env, 55 | stdout=subprocess_stdout, 56 | stderr=subprocess.STDOUT, 57 | bufsize=1 58 | ) 59 | processes.append((i, p, start, end, subprocess_stdout)) 60 | # Log output from inference processes and collate their results 61 | outputs = [] 62 | for i, p, start, end, subprocess_stdout in processes: 63 | log_subprocess_output(i, p, ckpt_path, tag, start, end) 64 | if isinstance(subprocess_stdout, IOBase): 65 | subprocess_stdout.close() 66 | range_file = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.pkl' % (tag, start, end)) 67 | range_data = pickle.load(open(range_file, 'rb')) 68 | outputs.append(range_data) 69 | return outputs 70 | 71 | 72 | def log_subprocess_output(i, p, ckpt_path, tag, start, end): 73 | """Capture the output of each subprocess and log it in the parent process. 74 | The first subprocess's output is logged in realtime. The output from the 75 | other subprocesses is buffered and then printed all at once (in order) when 76 | subprocesses finish. 77 | """ 78 | outfile = os.path.join(ckpt_path, 'test', '%s_range_%s_%s.stdout' % (tag, start, end)) 79 | logging_rank('# ' + '-' * 76 + ' #') 80 | logging_rank('stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)) 81 | logging_rank('# ' + '-' * 76 + ' #') 82 | if i == 0: 83 | # Stream the piped stdout from the first subprocess in realtime 84 | with open(outfile, 'w') as f: 85 | for line in iter(p.stdout.readline, b''): 86 | print(line.rstrip().decode('ascii')) 87 | f.write(str(line, encoding='ascii')) 88 | p.stdout.close() 89 | ret = p.wait() 90 | else: 91 | # For subprocesses >= 1, wait and dump their log file 92 | ret = p.wait() 93 | with open(outfile, 'r') as f: 94 | print(''.join(f.readlines())) 95 | assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret) 96 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class Timer(object): 5 | """A simple timer.""" 6 | 7 | def __init__(self): 8 | self.reset() 9 | 10 | def tic(self): 11 | # using time.time instead of time.clock because time time.clock 12 | # does not normalize for multithreading 13 | self.start_time = time.time() 14 | 15 | def toc(self, average=True): 16 | self.diff = time.time() - self.start_time 17 | self.total_time += self.diff 18 | self.calls += 1 19 | self.average_time = self.total_time / self.calls 20 | if average: 21 | return self.average_time 22 | else: 23 | return self.diff 24 | 25 | def reset(self): 26 | self.total_time = 0. 27 | self.calls = 0 28 | self.start_time = 0. 29 | self.diff = 0. 30 | self.average_time = 0. 31 | -------------------------------------------------------------------------------- /weights/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/Parsing-R-CNN/8c4d940dcd322bf7a8671f8b0faaabb3259bd384/weights/README.md --------------------------------------------------------------------------------