├── .gitignore ├── .pylintrc ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── configs ├── e2e_keypoint_rcnn_R-50-FPN.yaml ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml ├── e2e_mask_rcnn_R-101-C4.yml ├── e2e_mask_rcnn_R-101-FPN_2x.yaml └── e2e_mask_rcnn_R-50-C4.yml ├── demo ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k.jpg ├── convert_pdf2img.sh ├── e2e_mask_rcnn_R-50-C4 │ └── train_from_scratch_epoch1_bs4 │ │ ├── img1.jpg │ │ ├── img2.jpg │ │ ├── img3.jpg │ │ └── img4.jpg ├── img1_keypoints-detectron-R50-FPN.jpg ├── img1_keypoints-pydetectron-R50-FPN.jpg ├── img2_keypoints-detectron-R50-FPN.jpg ├── img2_keypoints-pydetectron-R50-FPN.jpg ├── sample_images │ ├── img1.jpg │ ├── img2.jpg │ ├── img3.jpg │ └── img4.jpg └── sample_images_keypoints │ ├── img1_keypoints.jpg │ └── img2_keypoints.jpg ├── lib ├── __init__.py ├── core │ ├── __init__.py │ ├── config.py │ └── test.py ├── datasets │ ├── __init__.py │ ├── dataset_catalog.py │ ├── dummy_datasets.py │ ├── json_dataset.py │ └── roidb.py ├── make.sh ├── model │ ├── __init__.py │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── nms │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── make.sh │ │ ├── nms_gpu.py │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cuda.c │ │ │ ├── nms_cuda.h │ │ │ ├── nms_cuda_kernel.cu │ │ │ └── nms_cuda_kernel.h │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ ├── roi_crop │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── crop_resize │ │ │ │ ├── __init__.py │ │ │ │ └── _crop_resize.so │ │ │ └── roi_crop │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── crop_resize.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ └── src │ │ │ ├── roi_crop.c │ │ │ ├── roi_crop.h │ │ │ ├── roi_crop_cuda.c │ │ │ ├── roi_crop_cuda.h │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ └── roi_crop_cuda_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_pooling │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ └── src │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ ├── roi_pooling_cuda.h │ │ │ ├── roi_pooling_kernel.cu │ │ │ └── roi_pooling_kernel.h │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ └── net_utils.py ├── modeling │ ├── FPN.py │ ├── ResNet.py │ ├── __init__.py │ ├── collect_and_distribute_fpn_rpn_proposals.py │ ├── fast_rcnn_heads.py │ ├── generate_anchors.py │ ├── generate_proposal_labels.py │ ├── generate_proposals.py │ ├── keypoint_rcnn_heads.py │ ├── mask_rcnn_heads.py │ ├── model_builder.py │ ├── roi_xfrom │ │ ├── __init__.py │ │ └── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ └── src │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ └── rpn_heads.py ├── nn │ ├── __init__.py │ ├── modules │ │ ├── __init__.py │ │ ├── affine.py │ │ └── upsample.py │ └── parallel │ │ ├── __init__.py │ │ ├── _functions.py │ │ ├── data_parallel.py │ │ ├── parallel_apply.py │ │ ├── replicate.py │ │ └── scatter_gather.py ├── roi_data │ ├── __init__.py │ ├── data_utils.py │ ├── fast_rcnn.py │ ├── keypoint_rcnn.py │ ├── loader.py │ ├── mask_rcnn.py │ ├── minibatch.py │ └── rpn.py ├── setup.py └── utils │ ├── __init__.py │ ├── blob.py │ ├── boxes.py │ ├── collections.py │ ├── colormap.py │ ├── cython_bbox.c │ ├── cython_bbox.pyx │ ├── cython_nms.c │ ├── cython_nms.pyx │ ├── detectron_weight_helper.py │ ├── env.py │ ├── fpn.py │ ├── keypoints.py │ ├── misc.py │ ├── net.py │ ├── resnet_weights_helper.py │ ├── segms.py │ ├── timer.py │ └── vis.py └── tools ├── _init_paths.py ├── download_imagenet_weights.py ├── infer_simple.py └── train_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | *.pyc 3 | *~ 4 | 5 | *.o 6 | *.so 7 | 8 | .ipynb_checkpoints 9 | notebooks/*.pkl 10 | 11 | /Outputs 12 | 13 | # ------------------------------ 14 | 15 | .vscode/* 16 | !.vscode/settings.json 17 | !.vscode/tasks.json 18 | !.vscode/launch.json 19 | !.vscode/extensions.json 20 | 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | *~ 49 | 50 | # temporary files which can be created if a process still has a handle open of a deleted file 51 | .fuse_hidden* 52 | 53 | # KDE directory preferences 54 | .directory 55 | 56 | # Linux trash folder which might appear on any partition or disk 57 | .Trash-* 58 | 59 | # .nfs files are created when an open file is removed but is still being accessed 60 | .nfs* 61 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | extension-pkg-whitelist=numpy,torch,cv2 3 | init-hook="sys.path.insert(0, './tools'); import _init_paths" 4 | 5 | [MESSAGES CONTROL] 6 | disable=wrong-import-position 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": true, 3 | "python.linting.flake8Enabled": false, 4 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Roy Tseng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | OUTPUT_DIR: . 50 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | OUTPUT_DIR: . 50 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | OUTPUT_DIR: . 50 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R-101-C4.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet101_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | OUTPUT_DIR: . -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | OUTPUT_DIR: . 45 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R-50-C4.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | OUTPUT_DIR: . -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k.jpg -------------------------------------------------------------------------------- /demo/convert_pdf2img.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pdfdir='' 4 | 5 | while getopts 'd:' flag; do 6 | case "$flag" in 7 | d) pdfdir=$OPTARG ;; 8 | esac 9 | done 10 | 11 | for pdf in $(ls ${pdfdir}/img*.pdf); do 12 | fname="${pdf%.*}" 13 | convert -density 300x300 -quality 95 $pdf ${fname}.jpg 14 | done 15 | -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img1_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img1_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img2_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img2_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/sample_images/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img1.jpg -------------------------------------------------------------------------------- /demo/sample_images/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img2.jpg -------------------------------------------------------------------------------- /demo/sample_images/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img3.jpg -------------------------------------------------------------------------------- /demo/sample_images/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img4.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img1_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images_keypoints/img1_keypoints.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img2_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images_keypoints/img2_keypoints.jpg -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/__init__.py -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/core/__init__.py -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/datasets/__init__.py -------------------------------------------------------------------------------- /lib/datasets/dummy_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use 16 | cases, like getting all classes in a dataset. This exists so that demos can be 17 | run without requiring users to download/install datasets first. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | from utils.collections import AttrDict 26 | 27 | 28 | def get_coco_dataset(): 29 | """A dummy COCO dataset that includes only the 'classes' field.""" 30 | ds = AttrDict() 31 | classes = [ 32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 45 | ] 46 | ds.classes = {i: name for i, name in enumerate(classes)} 47 | return ds 48 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | python setup.py build_ext --inplace 6 | rm -rf build 7 | 8 | # Choose cuda arch as you need 9 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 10 | -gencode arch=compute_35,code=sm_35 \ 11 | -gencode arch=compute_50,code=sm_50 \ 12 | -gencode arch=compute_52,code=sm_52 \ 13 | -gencode arch=compute_60,code=sm_60 \ 14 | -gencode arch=compute_61,code=sm_61 " 15 | # -gencode arch=compute_70,code=sm_70 " 16 | 17 | # compile NMS 18 | cd model/nms/src 19 | echo "Compiling nms kernels by nvcc..." 20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 21 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 22 | 23 | cd ../ 24 | python build.py 25 | 26 | # compile roi_pooling 27 | cd ../../ 28 | cd model/roi_pooling/src 29 | echo "Compiling roi pooling kernels by nvcc..." 30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 31 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 32 | cd ../ 33 | python build.py 34 | 35 | # # compile roi_align 36 | # cd ../../ 37 | # cd model/roi_align/src 38 | # echo "Compiling roi align kernels by nvcc..." 39 | # nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 40 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 41 | # cd ../ 42 | # python build.py 43 | 44 | # compile roi_crop 45 | cd ../../ 46 | cd model/roi_crop/src 47 | echo "Compiling roi crop kernels by nvcc..." 48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 49 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 50 | cd ../ 51 | python build.py 52 | 53 | # compile roi_align (based on Caffe2's implementation) 54 | cd ../../ 55 | cd modeling/roi_xfrom/roi_align/src 56 | echo "Compiling roi align kernels by nvcc..." 57 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 58 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 59 | cd ../ 60 | python build.py 61 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /lib/model/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from core.config import cfg 9 | from model.nms.nms_gpu import nms_gpu 10 | 11 | def nms(dets, thresh, force_cpu=False): 12 | """Dispatch to either CPU or GPU NMS implementations.""" 13 | if dets.shape[0] == 0: 14 | return [] 15 | # ---numpy version--- 16 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 17 | # ---pytorch version--- 18 | return nms_gpu(dets, thresh) 19 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | boxes_host->size[0], 15 | boxes_host->size[1], 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "nms_cuda_kernel.h" 13 | 14 | #define CUDA_WARN(XXX) \ 15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \ 16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \ 17 | << std::endl; cudaDeviceSynchronize(); } while (0) 18 | 19 | #define CUDA_CHECK(condition) \ 20 | /* Code block avoids redefinition of cudaError_t error */ \ 21 | do { \ 22 | cudaError_t error = condition; \ 23 | if (error != cudaSuccess) { \ 24 | std::cout << cudaGetErrorString(error) << std::endl; \ 25 | } \ 26 | } while (0) 27 | 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 30 | 31 | __device__ inline float devIoU(float const * const a, float const * const b) { 32 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 35 | float interS = width * height; 36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 38 | return interS / (Sa + Sb - interS); 39 | } 40 | 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh, 42 | float *dev_boxes, unsigned long long *dev_mask) { 43 | const int row_start = blockIdx.y; 44 | const int col_start = blockIdx.x; 45 | 46 | // if (row_start > col_start) return; 47 | 48 | const int row_size = 49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 50 | const int col_size = 51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 52 | 53 | __shared__ float block_boxes[threadsPerBlock * 5]; 54 | if (threadIdx.x < col_size) { 55 | block_boxes[threadIdx.x * 5 + 0] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 57 | block_boxes[threadIdx.x * 5 + 1] = 58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 59 | block_boxes[threadIdx.x * 5 + 2] = 60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 61 | block_boxes[threadIdx.x * 5 + 3] = 62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 63 | block_boxes[threadIdx.x * 5 + 4] = 64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 65 | } 66 | __syncthreads(); 67 | 68 | if (threadIdx.x < row_size) { 69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 70 | const float *cur_box = dev_boxes + cur_box_idx * 5; 71 | int i = 0; 72 | unsigned long long t = 0; 73 | int start = 0; 74 | if (row_start == col_start) { 75 | start = threadIdx.x + 1; 76 | } 77 | for (i = start; i < col_size; i++) { 78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 79 | t |= 1ULL << i; 80 | } 81 | } 82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 83 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 84 | } 85 | } 86 | 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 88 | int boxes_dim, float nms_overlap_thresh) { 89 | 90 | float* boxes_dev = NULL; 91 | unsigned long long* mask_dev = NULL; 92 | 93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 94 | 95 | CUDA_CHECK(cudaMalloc(&boxes_dev, 96 | boxes_num * boxes_dim * sizeof(float))); 97 | CUDA_CHECK(cudaMemcpy(boxes_dev, 98 | boxes_host, 99 | boxes_num * boxes_dim * sizeof(float), 100 | cudaMemcpyHostToDevice)); 101 | 102 | CUDA_CHECK(cudaMalloc(&mask_dev, 103 | boxes_num * col_blocks * sizeof(unsigned long long))); 104 | 105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 106 | DIVUP(boxes_num, threadsPerBlock)); 107 | dim3 threads(threadsPerBlock); 108 | 109 | // printf("i am at line %d\n", boxes_num); 110 | // printf("i am at line %d\n", boxes_dim); 111 | 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | // we need to create a memory for keep_out on cpu 127 | // otherwise, the following code cannot run 128 | 129 | int* keep_out_cpu = new int[boxes_num]; 130 | 131 | int num_to_keep = 0; 132 | for (int i = 0; i < boxes_num; i++) { 133 | int nblock = i / threadsPerBlock; 134 | int inblock = i % threadsPerBlock; 135 | 136 | if (!(remv[nblock] & (1ULL << inblock))) { 137 | // orignal: keep_out[num_to_keep++] = i; 138 | keep_out_cpu[num_to_keep++] = i; 139 | unsigned long long *p = &mask_host[0] + i * col_blocks; 140 | for (int j = nblock; j < col_blocks; j++) { 141 | remv[j] |= p[j]; 142 | } 143 | } 144 | } 145 | 146 | // copy keep_out_cpu to keep_out on gpu 147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice)); 148 | 149 | // *num_out = num_to_keep; 150 | 151 | // original: *num_out = num_to_keep; 152 | // copy num_to_keep to num_out on gpu 153 | 154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice)); 155 | 156 | // release cuda memory 157 | CUDA_CHECK(cudaFree(boxes_dev)); 158 | CUDA_CHECK(cudaFree(mask_dev)); 159 | // release cpu memory 160 | delete []keep_out_cpu; 161 | } 162 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_align_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | raise NotImplementedError 30 | 31 | return output 32 | 33 | def backward(self, grad_output): 34 | assert(self.feature_size is not None and grad_output.is_cuda) 35 | 36 | batch_size, num_channels, data_height, data_width = self.feature_size 37 | 38 | grad_input = self.rois.new(batch_size, num_channels, data_height, 39 | data_width).zero_() 40 | roi_align.roi_align_backward_cuda(self.aligned_height, 41 | self.aligned_width, 42 | self.spatial_scale, grad_output, 43 | self.rois, grad_input) 44 | 45 | # print grad_input 46 | 47 | return grad_input, None 48 | -------------------------------------------------------------------------------- /lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/_crop_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "roi_crop_cuda_kernel.h" 5 | 6 | #define real float 7 | 8 | // this symbol will be resolved automatically from PyTorch libs 9 | extern THCState *state; 10 | 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 12 | // we assume BHWD format in inputImages 13 | // we assume BHW(YX) format on grids 14 | 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){ 16 | // THCState *state = getCutorchState(L); 17 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 18 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 19 | // THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 20 | 21 | int success = 0; 22 | success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1], 23 | output->size[3], 24 | output->size[2], 25 | output->size[0], 26 | THCudaTensor_size(state, inputImages, 1), 27 | THCudaTensor_size(state, inputImages, 2), 28 | THCudaTensor_size(state, inputImages, 3), 29 | THCudaTensor_size(state, inputImages, 0), 30 | THCudaTensor_data(state, inputImages), 31 | THCudaTensor_stride(state, inputImages, 0), 32 | THCudaTensor_stride(state, inputImages, 1), 33 | THCudaTensor_stride(state, inputImages, 2), 34 | THCudaTensor_stride(state, inputImages, 3), 35 | THCudaTensor_data(state, grids), 36 | THCudaTensor_stride(state, grids, 0), 37 | THCudaTensor_stride(state, grids, 3), 38 | THCudaTensor_stride(state, grids, 1), 39 | THCudaTensor_stride(state, grids, 2), 40 | THCudaTensor_data(state, output), 41 | THCudaTensor_stride(state, output, 0), 42 | THCudaTensor_stride(state, output, 1), 43 | THCudaTensor_stride(state, output, 2), 44 | THCudaTensor_stride(state, output, 3), 45 | THCState_getCurrentStream(state)); 46 | 47 | //check for errors 48 | if (!success) { 49 | THError("aborting"); 50 | } 51 | return 1; 52 | } 53 | 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 55 | THCudaTensor *gradGrids, THCudaTensor *gradOutput) 56 | { 57 | // THCState *state = getCutorchState(L); 58 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 59 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 60 | // THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 61 | // THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor"); 62 | // THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor"); 63 | 64 | int success = 0; 65 | success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1], 66 | gradOutput->size[3], 67 | gradOutput->size[2], 68 | gradOutput->size[0], 69 | THCudaTensor_size(state, inputImages, 1), 70 | THCudaTensor_size(state, inputImages, 2), 71 | THCudaTensor_size(state, inputImages, 3), 72 | THCudaTensor_size(state, inputImages, 0), 73 | THCudaTensor_data(state, inputImages), 74 | THCudaTensor_stride(state, inputImages, 0), 75 | THCudaTensor_stride(state, inputImages, 1), 76 | THCudaTensor_stride(state, inputImages, 2), 77 | THCudaTensor_stride(state, inputImages, 3), 78 | THCudaTensor_data(state, grids), 79 | THCudaTensor_stride(state, grids, 0), 80 | THCudaTensor_stride(state, grids, 3), 81 | THCudaTensor_stride(state, grids, 1), 82 | THCudaTensor_stride(state, grids, 2), 83 | THCudaTensor_data(state, gradInputImages), 84 | THCudaTensor_stride(state, gradInputImages, 0), 85 | THCudaTensor_stride(state, gradInputImages, 1), 86 | THCudaTensor_stride(state, gradInputImages, 2), 87 | THCudaTensor_stride(state, gradInputImages, 3), 88 | THCudaTensor_data(state, gradGrids), 89 | THCudaTensor_stride(state, gradGrids, 0), 90 | THCudaTensor_stride(state, gradGrids, 3), 91 | THCudaTensor_stride(state, gradGrids, 1), 92 | THCudaTensor_stride(state, gradGrids, 2), 93 | THCudaTensor_data(state, gradOutput), 94 | THCudaTensor_stride(state, gradOutput, 0), 95 | THCudaTensor_stride(state, gradOutput, 1), 96 | THCudaTensor_stride(state, gradOutput, 2), 97 | THCudaTensor_stride(state, gradOutput, 3), 98 | THCState_getCurrentStream(state)); 99 | 100 | //check for errors 101 | if (!success) { 102 | THError("aborting"); 103 | } 104 | return 1; 105 | } 106 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_pooling_cuda.c'] 15 | headers += ['src/roi_pooling_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_pooling.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_pooling', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /lib/model/utils/net_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import torchvision.models as models 7 | from core.config import cfg 8 | from model.roi_crop.functions.roi_crop import RoICropFunction 9 | import cv2 10 | import pdb 11 | import random 12 | 13 | def save_net(fname, net): 14 | import h5py 15 | h5f = h5py.File(fname, mode='w') 16 | for k, v in net.state_dict().items(): 17 | h5f.create_dataset(k, data=v.cpu().numpy()) 18 | 19 | def load_net(fname, net): 20 | import h5py 21 | h5f = h5py.File(fname, mode='r') 22 | for k, v in net.state_dict().items(): 23 | param = torch.from_numpy(np.asarray(h5f[k])) 24 | v.copy_(param) 25 | 26 | def weights_normal_init(model, dev=0.01): 27 | if isinstance(model, list): 28 | for m in model: 29 | weights_normal_init(m, dev) 30 | else: 31 | for m in model.modules(): 32 | if isinstance(m, nn.Conv2d): 33 | m.weight.data.normal_(0.0, dev) 34 | elif isinstance(m, nn.Linear): 35 | m.weight.data.normal_(0.0, dev) 36 | 37 | 38 | def _crop_pool_layer(bottom, rois, max_pool=True): 39 | # code modified from 40 | # https://github.com/ruotianluo/pytorch-faster-rcnn 41 | # implement it using stn 42 | # box to affine 43 | # input (x1,y1,x2,y2) 44 | """ 45 | [ x2-x1 x1 + x2 - W + 1 ] 46 | [ ----- 0 --------------- ] 47 | [ W - 1 W - 1 ] 48 | [ ] 49 | [ y2-y1 y1 + y2 - H + 1 ] 50 | [ 0 ----- --------------- ] 51 | [ H - 1 H - 1 ] 52 | """ 53 | rois = rois.detach() 54 | batch_size = bottom.size(0) 55 | D = bottom.size(1) 56 | H = bottom.size(2) 57 | W = bottom.size(3) 58 | roi_per_batch = rois.size(0) / batch_size 59 | x1 = rois[:, 1::4] / 16.0 60 | y1 = rois[:, 2::4] / 16.0 61 | x2 = rois[:, 3::4] / 16.0 62 | y2 = rois[:, 4::4] / 16.0 63 | 64 | height = bottom.size(2) 65 | width = bottom.size(3) 66 | 67 | # affine theta 68 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 69 | theta = torch.cat([\ 70 | (x2 - x1) / (width - 1), 71 | zero, 72 | (x1 + x2 - width + 1) / (width - 1), 73 | zero, 74 | (y2 - y1) / (height - 1), 75 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 76 | 77 | if max_pool: 78 | pre_pool_size = cfg.POOLING_SIZE * 2 79 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) 80 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 81 | .contiguous().view(-1, D, H, W) 82 | crops = F.grid_sample(bottom, grid) 83 | crops = F.max_pool2d(crops, 2, 2) 84 | else: 85 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) 86 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 87 | .contiguous().view(-1, D, H, W) 88 | crops = F.grid_sample(bottom, grid) 89 | 90 | return crops, grid 91 | 92 | def _affine_grid_gen(rois, input_size, grid_size): 93 | 94 | rois = rois.detach() 95 | x1 = rois[:, 1::4] / 16.0 96 | y1 = rois[:, 2::4] / 16.0 97 | x2 = rois[:, 3::4] / 16.0 98 | y2 = rois[:, 4::4] / 16.0 99 | 100 | height = input_size[0] 101 | width = input_size[1] 102 | 103 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 104 | theta = torch.cat([\ 105 | (x2 - x1) / (width - 1), 106 | zero, 107 | (x1 + x2 - width + 1) / (width - 1), 108 | zero, 109 | (y2 - y1) / (height - 1), 110 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 111 | 112 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size))) 113 | 114 | return grid 115 | 116 | def _affine_theta(rois, input_size): 117 | 118 | rois = rois.detach() 119 | x1 = rois[:, 1::4] / 16.0 120 | y1 = rois[:, 2::4] / 16.0 121 | x2 = rois[:, 3::4] / 16.0 122 | y2 = rois[:, 4::4] / 16.0 123 | 124 | height = input_size[0] 125 | width = input_size[1] 126 | 127 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 128 | 129 | # theta = torch.cat([\ 130 | # (x2 - x1) / (width - 1), 131 | # zero, 132 | # (x1 + x2 - width + 1) / (width - 1), 133 | # zero, 134 | # (y2 - y1) / (height - 1), 135 | # (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 136 | 137 | theta = torch.cat([\ 138 | (y2 - y1) / (height - 1), 139 | zero, 140 | (y1 + y2 - height + 1) / (height - 1), 141 | zero, 142 | (x2 - x1) / (width - 1), 143 | (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3) 144 | 145 | return theta 146 | 147 | def compare_grid_sample(): 148 | # do gradcheck 149 | N = random.randint(1, 8) 150 | C = 2 # random.randint(1, 8) 151 | H = 5 # random.randint(1, 8) 152 | W = 4 # random.randint(1, 8) 153 | input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True) 154 | input_p = input.clone().data.contiguous() 155 | 156 | grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True) 157 | grid_clone = grid.clone().contiguous() 158 | 159 | out_offcial = F.grid_sample(input, grid) 160 | grad_outputs = Variable(torch.rand(out_offcial.size()).cuda()) 161 | grad_outputs_clone = grad_outputs.clone().contiguous() 162 | grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous()) 163 | grad_input_off = grad_inputs[0] 164 | 165 | 166 | crf = RoICropFunction() 167 | grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda() 168 | out_stn = crf.forward(input_p, grid_yx) 169 | grad_inputs = crf.backward(grad_outputs_clone.data) 170 | grad_input_stn = grad_inputs[0] 171 | pdb.set_trace() 172 | 173 | delta = (grad_input_off.data - grad_input_stn).sum() 174 | -------------------------------------------------------------------------------- /lib/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/__init__.py -------------------------------------------------------------------------------- /lib/modeling/collect_and_distribute_fpn_rpn_proposals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | 4 | from core.config import cfg 5 | from datasets import json_dataset 6 | import roi_data.fast_rcnn 7 | import utils.blob as blob_utils 8 | import utils.fpn as fpn_utils 9 | 10 | 11 | class CollectAndDistributeFpnRpnProposalsOp(nn.Module): 12 | """Merge RPN proposals generated at multiple FPN levels and then 13 | distribute those proposals to their appropriate FPN levels. An anchor 14 | at one FPN level may predict an RoI that will map to another level, 15 | hence the need to redistribute the proposals. 16 | 17 | This function assumes standard blob names for input and output blobs. 18 | 19 | Input blobs: [rpn_rois_fpn, ..., rpn_rois_fpn, 20 | rpn_roi_probs_fpn, ..., rpn_roi_probs_fpn] 21 | - rpn_rois_fpn are the RPN proposals for FPN level i; see rpn_rois 22 | documentation from GenerateProposals. 23 | - rpn_roi_probs_fpn are the RPN objectness probabilities for FPN 24 | level i; see rpn_roi_probs documentation from GenerateProposals. 25 | 26 | If used during training, then the input blobs will also include: 27 | [roidb, im_info] (see GenerateProposalLabels). 28 | 29 | Output blobs: [rois_fpn, ..., rois_rpn, rois, 30 | rois_idx_restore] 31 | - rois_fpn are the RPN proposals for FPN level i 32 | - rois_idx_restore is a permutation on the concatenation of all 33 | rois_fpn, i=min...max, such that when applied the RPN RoIs are 34 | restored to their original order in the input blobs. 35 | 36 | If used during training, then the output blobs will also include: 37 | [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights]. 38 | """ 39 | def __init__(self): 40 | super().__init__() 41 | 42 | def forward(self, inputs, roidb, im_info): 43 | """ 44 | Args: 45 | inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6, 46 | rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6] 47 | im_info: [[im_height, im_width, im_scale], ...] 48 | """ 49 | rois = collect(inputs, self.training) 50 | if self.training: 51 | # During training we reuse the data loader code. We populate roidb 52 | # entries on the fly using the rois generated by RPN. 53 | im_scales = im_info.data.numpy()[:, 2] 54 | # For historical consistency with the original Faster R-CNN 55 | # implementation we are *not* filtering crowd proposals. 56 | # This choice should be investigated in the future (it likely does 57 | # not matter). 58 | json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) 59 | # Compute training labels for the RPN proposals; also handles 60 | # distributing the proposals over FPN levels 61 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 62 | blobs = {k: [] for k in output_blob_names} 63 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 64 | else: 65 | # For inference we have a special code path that avoids some data 66 | # loader overhead 67 | blobs = distribute(rois, None) 68 | 69 | return blobs 70 | 71 | 72 | def collect(inputs, is_training): 73 | cfg_key = 'TRAIN' if is_training else 'TEST' 74 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 75 | k_max = cfg.FPN.RPN_MAX_LEVEL 76 | k_min = cfg.FPN.RPN_MIN_LEVEL 77 | num_lvls = k_max - k_min + 1 78 | roi_inputs = inputs[:num_lvls] 79 | score_inputs = inputs[num_lvls:] 80 | 81 | # rois are in [[batch_idx, x0, y0, x1, y2], ...] format 82 | # Combine predictions across all levels and retain the top scoring 83 | rois = np.concatenate(roi_inputs) 84 | scores = np.concatenate(score_inputs).squeeze() 85 | inds = np.argsort(-scores)[:post_nms_topN] 86 | rois = rois[inds, :] 87 | return rois 88 | 89 | 90 | def distribute(rois, label_blobs): 91 | """To understand the output blob order see return value of 92 | roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False) 93 | """ 94 | lvl_min = cfg.FPN.ROI_MIN_LEVEL 95 | lvl_max = cfg.FPN.ROI_MAX_LEVEL 96 | lvls = fpn_utils.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max) 97 | 98 | # Delete roi entries that have negative area 99 | idx_neg = np.where(lvls == -1)[0] 100 | rois = np.delete(rois, idx_neg, axis=0) 101 | lvls = np.delete(lvls, idx_neg, axis=0) 102 | 103 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False) 104 | outputs = [None] * len(output_blob_names) 105 | outputs[0] = rois 106 | 107 | # Create new roi blobs for each FPN level 108 | # (See: utils.fpn.add_multilevel_roi_blobs which is similar but annoying 109 | # to generalize to support this particular case.) 110 | rois_idx_order = np.empty((0, )) 111 | for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)): 112 | idx_lvl = np.where(lvls == lvl)[0] 113 | blob_roi_level = rois[idx_lvl, :] 114 | outputs[output_idx + 1] = blob_roi_level 115 | rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) 116 | rois_idx_restore = np.argsort(rois_idx_order) 117 | outputs[-1] = rois_idx_restore.astype(np.int32) 118 | 119 | return dict(zip(output_blob_names, outputs)) 120 | -------------------------------------------------------------------------------- /lib/modeling/fast_rcnn_heads.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.nn.init as init 5 | from torch.autograd import Variable 6 | 7 | from core.config import cfg 8 | import utils.net as net_utils 9 | 10 | 11 | class fast_rcnn_outputs(nn.Module): 12 | def __init__(self, dim_in): 13 | super().__init__() 14 | self.cls_score = nn.Linear(dim_in, cfg.MODEL.NUM_CLASSES) 15 | if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: 16 | self.bbox_pred = nn.Linear(dim_in, 4) 17 | else: 18 | self.bbox_pred = nn.Linear(dim_in, 4 * cfg.MODEL.NUM_CLASSES) 19 | 20 | self._init_weights() 21 | 22 | def _init_weights(self): 23 | init.normal(self.cls_score.weight, std=0.01) 24 | init.constant(self.cls_score.bias, 0) 25 | init.normal(self.bbox_pred.weight, std=0.001) 26 | init.constant(self.bbox_pred.bias, 0) 27 | 28 | def detectron_weight_mapping(self): 29 | detectron_weight_mapping = { 30 | 'cls_score.weight': 'cls_score_w', 31 | 'cls_score.bias': 'cls_score_b', 32 | 'bbox_pred.weight': 'bbox_pred_w', 33 | 'bbox_pred.bias': 'bbox_pred_b' 34 | } 35 | orphan_in_detectron = [] 36 | return detectron_weight_mapping, orphan_in_detectron 37 | 38 | def forward(self, x): 39 | if x.dim() == 4: 40 | x = x.squeeze(3).squeeze(2) 41 | cls_score = self.cls_score(x) 42 | if not self.training: 43 | cls_score = F.softmax(cls_score, dim=1) 44 | bbox_pred = self.bbox_pred(x) 45 | 46 | return cls_score, bbox_pred 47 | 48 | 49 | def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, 50 | bbox_inside_weights, bbox_outside_weights): 51 | device_id = cls_score.get_device() 52 | rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id) 53 | loss_cls = F.cross_entropy(cls_score, rois_label) 54 | 55 | bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) 56 | bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id) 57 | bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id) 58 | loss_bbox = net_utils.smooth_l1_loss( 59 | bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) 60 | return loss_cls, loss_bbox 61 | 62 | 63 | # ---------------------------------------------------------------------------- # 64 | # Box heads 65 | # ---------------------------------------------------------------------------- # 66 | 67 | class roi_2mlp_head(nn.Module): 68 | """Add a ReLU MLP with two hidden layers.""" 69 | def __init__(self, dim_in, roi_xform_func, spatial_scale): 70 | super().__init__() 71 | self.dim_in = dim_in 72 | self.roi_xform = roi_xform_func 73 | self.spatial_scale = spatial_scale 74 | self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM 75 | 76 | roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION 77 | self.fc1 = nn.Linear(dim_in * roi_size**2, hidden_dim) 78 | self.fc2 = nn.Linear(hidden_dim, hidden_dim) 79 | 80 | self._init_weights() 81 | 82 | def _init_weights(self): 83 | init.xavier_uniform(self.fc1.weight) 84 | init.constant(self.fc1.bias, 0) 85 | init.xavier_uniform(self.fc2.weight) 86 | init.constant(self.fc2.bias, 0) 87 | 88 | def detectron_weight_mapping(self): 89 | detectron_weight_mapping = { 90 | 'fc1.weight': 'fc6_w', 91 | 'fc1.bias': 'fc6_b', 92 | 'fc2.weight': 'fc7_w', 93 | 'fc2.bias': 'fc7_b' 94 | } 95 | return detectron_weight_mapping, [] 96 | 97 | def forward(self, x, rpn_ret): 98 | x = self.roi_xform( 99 | x, rpn_ret, 100 | blob_rois='rois', 101 | method=cfg.FAST_RCNN.ROI_XFORM_METHOD, 102 | resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION, 103 | spatial_scale=self.spatial_scale, 104 | sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO 105 | ) 106 | batch_size = x.size(0) 107 | x = F.relu(self.fc1(x.view(batch_size, -1)), inplace=True) 108 | x = F.relu(self.fc2(x), inplace=True) 109 | 110 | return x 111 | -------------------------------------------------------------------------------- /lib/modeling/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Faster R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick and Sean Bell 22 | # -------------------------------------------------------- 23 | 24 | import numpy as np 25 | 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 27 | # 28 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 29 | # >> anchors 30 | # 31 | # anchors = 32 | # 33 | # -83 -39 100 56 34 | # -175 -87 192 104 35 | # -359 -183 376 200 36 | # -55 -55 72 72 37 | # -119 -119 136 136 38 | # -247 -247 264 264 39 | # -35 -79 52 96 40 | # -79 -167 96 184 41 | # -167 -343 184 360 42 | 43 | # array([[ -83., -39., 100., 56.], 44 | # [-175., -87., 192., 104.], 45 | # [-359., -183., 376., 200.], 46 | # [ -55., -55., 72., 72.], 47 | # [-119., -119., 136., 136.], 48 | # [-247., -247., 264., 264.], 49 | # [ -35., -79., 52., 96.], 50 | # [ -79., -167., 96., 184.], 51 | # [-167., -343., 184., 360.]]) 52 | 53 | 54 | def generate_anchors( 55 | stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2) 56 | ): 57 | """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 58 | are centered on stride / 2, have (approximate) sqrt areas of the specified 59 | sizes, and aspect ratios as given. 60 | """ 61 | return _generate_anchors( 62 | stride, 63 | np.array(sizes, dtype=np.float) / stride, 64 | np.array(aspect_ratios, dtype=np.float) 65 | ) 66 | 67 | 68 | def _generate_anchors(base_size, scales, aspect_ratios): 69 | """Generate anchor (reference) windows by enumerating aspect ratios X 70 | scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. 71 | """ 72 | anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1 73 | anchors = _ratio_enum(anchor, aspect_ratios) 74 | anchors = np.vstack( 75 | [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] 76 | ) 77 | return anchors 78 | 79 | 80 | def _whctrs(anchor): 81 | """Return width, height, x center, and y center for an anchor (window).""" 82 | w = anchor[2] - anchor[0] + 1 83 | h = anchor[3] - anchor[1] + 1 84 | x_ctr = anchor[0] + 0.5 * (w - 1) 85 | y_ctr = anchor[1] + 0.5 * (h - 1) 86 | return w, h, x_ctr, y_ctr 87 | 88 | 89 | def _mkanchors(ws, hs, x_ctr, y_ctr): 90 | """Given a vector of widths (ws) and heights (hs) around a center 91 | (x_ctr, y_ctr), output a set of anchors (windows). 92 | """ 93 | ws = ws[:, np.newaxis] 94 | hs = hs[:, np.newaxis] 95 | anchors = np.hstack( 96 | ( 97 | x_ctr - 0.5 * (ws - 1), 98 | y_ctr - 0.5 * (hs - 1), 99 | x_ctr + 0.5 * (ws - 1), 100 | y_ctr + 0.5 * (hs - 1) 101 | ) 102 | ) 103 | return anchors 104 | 105 | 106 | def _ratio_enum(anchor, ratios): 107 | """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" 108 | w, h, x_ctr, y_ctr = _whctrs(anchor) 109 | size = w * h 110 | size_ratios = size / ratios 111 | ws = np.round(np.sqrt(size_ratios)) 112 | hs = np.round(ws * ratios) 113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 114 | return anchors 115 | 116 | 117 | def _scale_enum(anchor, scales): 118 | """Enumerate a set of anchors for each scale wrt an anchor.""" 119 | w, h, x_ctr, y_ctr = _whctrs(anchor) 120 | ws = w * scales 121 | hs = h * scales 122 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 123 | return anchors 124 | -------------------------------------------------------------------------------- /lib/modeling/generate_proposal_labels.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from core.config import cfg 4 | from datasets import json_dataset 5 | import roi_data.fast_rcnn 6 | 7 | 8 | class GenerateProposalLabelsOp(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | 12 | def forward(self, rpn_rois, roidb, im_info): 13 | """Op for generating training labels for RPN proposals. This is used 14 | when training RPN jointly with Fast/Mask R-CNN (as in end-to-end 15 | Faster R-CNN training). 16 | 17 | blobs_in: 18 | - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals 19 | - 'roidb': roidb entries that will be labeled 20 | - 'im_info': See GenerateProposals doc. 21 | 22 | blobs_out: 23 | - (variable set of blobs): returns whatever blobs are required for 24 | training the model. It does this by querying the data loader for 25 | the list of blobs that are needed. 26 | """ 27 | im_scales = im_info.data.numpy()[:, 2] 28 | 29 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 30 | # For historical consistency with the original Faster R-CNN 31 | # implementation we are *not* filtering crowd proposals. 32 | # This choice should be investigated in the future (it likely does 33 | # not matter). 34 | # Note: crowd_thresh=0 will ignore _filter_crowd_proposals 35 | json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0) 36 | blobs = {k: [] for k in output_blob_names} 37 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 38 | 39 | return blobs 40 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_align_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.sampling_ratio = int(sampling_ratio) 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | self.rois = rois 18 | self.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 24 | if features.is_cuda: 25 | roi_align.roi_align_forward_cuda(self.aligned_height, 26 | self.aligned_width, 27 | self.spatial_scale, self.sampling_ratio, features, 28 | rois, output) 29 | else: 30 | raise NotImplementedError 31 | 32 | return output 33 | 34 | def backward(self, grad_output): 35 | assert(self.feature_size is not None and grad_output.is_cuda) 36 | 37 | batch_size, num_channels, data_height, data_width = self.feature_size 38 | 39 | grad_input = self.rois.new(batch_size, num_channels, data_height, 40 | data_width).zero_() 41 | roi_align.roi_align_backward_cuda(self.aligned_height, 42 | self.aligned_width, 43 | self.spatial_scale, self.sampling_ratio, grad_output, 44 | self.rois, grad_input) 45 | 46 | # print grad_input 47 | 48 | return grad_input, None 49 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | self.sampling_ratio = int(sampling_ratio) 14 | 15 | def forward(self, features, rois): 16 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 17 | self.spatial_scale, self.sampling_ratio)(features, rois) 18 | 19 | class RoIAlignAvg(Module): 20 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 21 | super(RoIAlignAvg, self).__init__() 22 | 23 | self.aligned_width = int(aligned_width) 24 | self.aligned_height = int(aligned_height) 25 | self.spatial_scale = float(spatial_scale) 26 | self.sampling_ratio = int(sampling_ratio) 27 | 28 | def forward(self, features, rois): 29 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 30 | self.spatial_scale, self.sampling_ratio)(features, rois) 31 | return avg_pool2d(x, kernel_size=2, stride=1) 32 | 33 | class RoIAlignMax(Module): 34 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 35 | super(RoIAlignMax, self).__init__() 36 | 37 | self.aligned_width = int(aligned_width) 38 | self.aligned_height = int(aligned_height) 39 | self.spatial_scale = float(spatial_scale) 40 | self.sampling_ratio = int(sampling_ratio) 41 | 42 | def forward(self, features, rois): 43 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 44 | self.spatial_scale, self.sampling_ratio)(features, rois) 45 | return max_pool2d(x, kernel_size=2, stride=1) 46 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, sampling_ratio, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, sampling_ratio, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const int sampling_ratio, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const int sampling_ratio, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/modeling/rpn_heads.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import init 3 | import torch.nn.functional as F 4 | 5 | from core.config import cfg 6 | from modeling.generate_anchors import generate_anchors 7 | from modeling.generate_proposals import GenerateProposalsOp 8 | from modeling.generate_proposal_labels import GenerateProposalLabelsOp 9 | import modeling.FPN as FPN 10 | import utils.net as net_utils 11 | 12 | 13 | # ---------------------------------------------------------------------------- # 14 | # RPN and Faster R-CNN outputs and losses 15 | # ---------------------------------------------------------------------------- # 16 | 17 | def generic_rpn_outputs(dim_in, spatial_scale_in): 18 | """Add RPN outputs (objectness classification and bounding box regression) 19 | to an RPN model. Abstracts away the use of FPN. 20 | """ 21 | if cfg.FPN.FPN_ON: 22 | # Delegate to the FPN module 23 | return FPN.fpn_rpn_outputs(dim_in, spatial_scale_in) 24 | else: 25 | # Not using FPN, add RPN to a single scale 26 | return single_scale_rpn_outputs(dim_in, spatial_scale_in) 27 | 28 | 29 | def generic_rpn_losses(*inputs, **kwargs): 30 | """Add RPN losses. Abstracts away the use of FPN.""" 31 | if cfg.FPN.FPN_ON: 32 | return FPN.fpn_rpn_losses(*inputs, **kwargs) 33 | else: 34 | return single_scale_rpn_losses(*inputs, **kwargs) 35 | 36 | 37 | class single_scale_rpn_outputs(nn.Module): 38 | """Add RPN outputs to a single scale model (i.e., no FPN).""" 39 | def __init__(self, dim_in, spatial_scale): 40 | super().__init__() 41 | self.dim_in = dim_in 42 | self.dim_out = dim_in if cfg.RPN.OUT_DIM_AS_IN_DIM else cfg.RPN.OUT_DIM 43 | anchors = generate_anchors( 44 | stride=1. / spatial_scale, 45 | sizes=cfg.RPN.SIZES, 46 | aspect_ratios=cfg.RPN.ASPECT_RATIOS) 47 | num_anchors = anchors.shape[0] 48 | 49 | # RPN hidden representation 50 | self.RPN_conv = nn.Conv2d(self.dim_in, self.dim_out, 3, 1, 1) 51 | # Proposal classification scores 52 | self.n_score_out = num_anchors * 2 if cfg.RPN.CLS_ACTIVATION == 'softmax' \ 53 | else num_anchors 54 | self.RPN_cls_score = nn.Conv2d(self.dim_out, self.n_score_out, 1, 1, 0) 55 | # Proposal bbox regression deltas 56 | self.RPN_bbox_pred = nn.Conv2d(self.dim_out, num_anchors * 4, 1, 1, 0) 57 | 58 | self.RPN_GenerateProposals = GenerateProposalsOp(anchors, spatial_scale) 59 | self.RPN_GenerateProposalLabels = GenerateProposalLabelsOp() 60 | 61 | self._init_weights() 62 | 63 | def _init_weights(self): 64 | init.normal(self.RPN_conv.weight, std=0.01) 65 | init.constant(self.RPN_conv.bias, 0) 66 | init.normal(self.RPN_cls_score.weight, std=0.01) 67 | init.constant(self.RPN_cls_score.bias, 0) 68 | init.normal(self.RPN_bbox_pred.weight, std=0.01) 69 | init.constant(self.RPN_bbox_pred.bias, 0) 70 | 71 | def detectron_weight_mapping(self): 72 | detectron_weight_mapping = { 73 | 'RPN_conv.weight': 'conv_rpn_w', 74 | 'RPN_conv.bias': 'conv_rpn_b', 75 | 'RPN_cls_score.weight': 'rpn_cls_logits_w', 76 | 'RPN_cls_score.bias': 'rpn_cls_logits_b', 77 | 'RPN_bbox_pred.weight': 'rpn_bbox_pred_w', 78 | 'RPN_bbox_pred.bias': 'rpn_bbox_pred_b' 79 | } 80 | orphan_in_detectron = [] 81 | return detectron_weight_mapping, orphan_in_detectron 82 | 83 | def forward(self, x, im_info, roidb=None): 84 | """ 85 | x: feature maps from the backbone network. (Variable) 86 | im_info: (CPU Variable) 87 | roidb: (list of ndarray) 88 | """ 89 | rpn_conv = F.relu(self.RPN_conv(x), inplace=True) 90 | 91 | rpn_cls_logits = self.RPN_cls_score(rpn_conv) 92 | 93 | rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv) 94 | 95 | return_dict = { 96 | 'rpn_cls_logits': rpn_cls_logits, 'rpn_bbox_pred': rpn_bbox_pred} 97 | 98 | if not self.training or cfg.MODEL.FASTER_RCNN: 99 | # Proposals are needed during: 100 | # 1) inference (== not model.train) for RPN only and Faster R-CNN 101 | # OR 102 | # 2) training for Faster R-CNN 103 | # Otherwise (== training for RPN only), proposals are not needed 104 | if cfg.RPN.CLS_ACTIVATION == 'softmax': 105 | B, C, H, W = rpn_cls_logits.size() 106 | rpn_cls_prob = F.softmax( 107 | rpn_cls_logits.view(B, 2, C / 2, H, W), dim=1).view( 108 | B, C, H, W) 109 | rpn_cls_prob = rpn_bbox_pred[:, 1].view(B, C / 2, H, W) 110 | else: 111 | rpn_cls_prob = F.sigmoid(rpn_cls_logits) 112 | 113 | rpn_rois, rpn_rois_prob = self.RPN_GenerateProposals( 114 | rpn_cls_prob, rpn_bbox_pred, im_info) 115 | 116 | return_dict['rpn_rois'] = rpn_rois 117 | return_dict['rpn_roi_probs'] = rpn_rois_prob 118 | 119 | if cfg.MODEL.FASTER_RCNN : 120 | if self.training: 121 | # Add op that generates training labels for in-network RPN proposals 122 | blobs_out = self.RPN_GenerateProposalLabels(rpn_rois, roidb, im_info) 123 | return_dict.update(blobs_out) 124 | else: 125 | # Alias rois to rpn_rois for inference 126 | return_dict['rois'] = return_dict['rpn_rois'] 127 | 128 | return return_dict 129 | 130 | 131 | def single_scale_rpn_losses( 132 | rpn_cls_logits, rpn_bbox_pred, 133 | rpn_labels_int32_wide, rpn_bbox_targets_wide, 134 | rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide): 135 | """Add losses for a single scale RPN model (i.e., no FPN).""" 136 | h, w = rpn_cls_logits.shape[2:] 137 | rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w] # -1 means ignore 138 | h, w = rpn_bbox_pred.shape[2:] 139 | rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w] 140 | rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w] 141 | rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w] 142 | 143 | if cfg.RPN.CLS_ACTIVATION == 'softmax': 144 | B, C, H, W = rpn_cls_logits.size() 145 | rpn_cls_logits = rpn_cls_logits.view(B, 2, C / 2, H, W).permute(0, 2, 3, 4, 1).view(-1, 2) 146 | rpn_labels_int32 = rpn_labels_int32.view(-1).long() 147 | loss_rpn_cls = F.cross_entropy(rpn_cls_logits, rpn_labels_int32, ignore_index=-1, size_average=False) 148 | loss_rpn_cls /= (rpn_labels_int32 >= 0).sum().float() 149 | else: 150 | weight = (rpn_labels_int32 >= 0).float() 151 | loss_rpn_cls = F.binary_cross_entropy_with_logits( 152 | rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False) 153 | loss_rpn_cls /= weight.sum() 154 | 155 | loss_rpn_bbox = net_utils.smooth_l1_loss( 156 | rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, 157 | beta=1/9) 158 | 159 | return loss_rpn_cls, loss_rpn_bbox 160 | -------------------------------------------------------------------------------- /lib/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | from .parallel import DataParallel -------------------------------------------------------------------------------- /lib/nn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine import AffineChannel2d 2 | from .upsample import BilinearInterpolation2d 3 | -------------------------------------------------------------------------------- /lib/nn/modules/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AffineChannel2d(nn.Module): 6 | """ A simple channel-wise affine transformation operation """ 7 | def __init__(self, num_features): 8 | super().__init__() 9 | self.num_features = num_features 10 | self.weight = nn.Parameter(torch.Tensor(num_features)) 11 | self.bias = nn.Parameter(torch.Tensor(num_features)) 12 | self.weight.data.uniform_() 13 | self.bias.data.zero_() 14 | 15 | def forward(self, x): 16 | return x * self.weight.view(1, self.num_features, 1, 1) + \ 17 | self.bias.view(1, self.num_features, 1, 1) 18 | -------------------------------------------------------------------------------- /lib/nn/modules/upsample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | 9 | class BilinearInterpolation2d(nn.Module): 10 | """Bilinear interpolation in space of scale. 11 | 12 | Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale 13 | 14 | Adapted from the CVPR'15 FCN code. 15 | See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py 16 | """ 17 | def __init__(self, in_channels, out_channels, up_scale): 18 | super().__init__() 19 | assert in_channels == out_channels 20 | assert up_scale % 2 == 0, 'Scale should be even' 21 | self.in_channes = in_channels 22 | self.out_channels = out_channels 23 | self.up_scale = int(up_scale) 24 | self.padding = up_scale // 2 25 | 26 | def upsample_filt(size): 27 | factor = (size + 1) // 2 28 | if size % 2 == 1: 29 | center = factor - 1 30 | else: 31 | center = factor - 0.5 32 | og = np.ogrid[:size, :size] 33 | return ((1 - abs(og[0] - center) / factor) * 34 | (1 - abs(og[1] - center) / factor)) 35 | 36 | kernel_size = up_scale * 2 37 | bil_filt = upsample_filt(kernel_size) 38 | 39 | kernel = np.zeros( 40 | (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32 41 | ) 42 | kernel[range(in_channels), range(out_channels), :, :] = bil_filt 43 | 44 | self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, 45 | stride=self.up_scale, padding=self.padding) 46 | 47 | self.upconv.weight.data.copy_(torch.from_numpy(kernel)) 48 | self.upconv.bias.data.fill_(0) 49 | self.upconv.weight.requires_grad = False 50 | self.upconv.bias.requires_grad = False 51 | 52 | def forward(self, x): 53 | return self.upconv(x) 54 | -------------------------------------------------------------------------------- /lib/nn/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | from .parallel_apply import parallel_apply 2 | from .replicate import replicate 3 | from .data_parallel import DataParallel, data_parallel 4 | from .scatter_gather import scatter, gather 5 | 6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel', 7 | 'DataParallel'] 8 | -------------------------------------------------------------------------------- /lib/nn/parallel/_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.cuda.comm as comm 3 | from torch.autograd import Function 4 | 5 | 6 | class Broadcast(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, target_gpus, *inputs): 10 | if not all(input.is_cuda for input in inputs): 11 | raise TypeError('Broadcast function not implemented for CPU tensors') 12 | ctx.target_gpus = target_gpus 13 | if len(inputs) == 0: 14 | return tuple() 15 | ctx.num_inputs = len(inputs) 16 | ctx.input_device = inputs[0].get_device() 17 | outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) 18 | non_differentiables = [] 19 | for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]): 20 | if not input_requires_grad: 21 | for output in outputs: 22 | non_differentiables.append(output[idx]) 23 | ctx.mark_non_differentiable(*non_differentiables) 24 | return tuple([t for tensors in outputs for t in tensors]) 25 | 26 | @staticmethod 27 | def backward(ctx, *grad_outputs): 28 | return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs) 29 | 30 | 31 | class ReduceAddCoalesced(Function): 32 | 33 | @staticmethod 34 | def forward(ctx, destination, num_inputs, *grads): 35 | ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)] 36 | 37 | grads = [grads[i:i + num_inputs] 38 | for i in range(0, len(grads), num_inputs)] 39 | return comm.reduce_add_coalesced(grads, destination) 40 | 41 | @staticmethod 42 | def backward(ctx, *grad_outputs): 43 | return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs) 44 | 45 | 46 | class Gather(Function): 47 | 48 | @staticmethod 49 | def forward(ctx, target_device, dim, *inputs): 50 | assert all(map(lambda i: i.is_cuda, inputs)) 51 | ctx.target_device = target_device 52 | ctx.dim = dim 53 | ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs)) 54 | ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs)) 55 | return comm.gather(inputs, ctx.dim, ctx.target_device) 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output) 60 | 61 | 62 | class Scatter(Function): 63 | 64 | @staticmethod 65 | def forward(ctx, target_gpus, chunk_sizes, dim, input): 66 | ctx.target_gpus = target_gpus 67 | ctx.chunk_sizes = chunk_sizes 68 | ctx.dim = dim 69 | ctx.input_device = input.get_device() if input.is_cuda else -1 70 | streams = None 71 | if ctx.input_device == -1: 72 | # Perform CPU to GPU copies in a background stream 73 | streams = [_get_stream(device) for device in ctx.target_gpus] 74 | outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams) 75 | # Synchronize with the copy stream 76 | if streams is not None: 77 | for i, output in enumerate(outputs): 78 | with torch.cuda.device(ctx.target_gpus[i]): 79 | main_stream = torch.cuda.current_stream() 80 | main_stream.wait_stream(streams[i]) 81 | output.record_stream(main_stream) 82 | return outputs 83 | 84 | @staticmethod 85 | def backward(ctx, *grad_output): 86 | return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output) 87 | 88 | 89 | # background streams used for copying 90 | _streams = None 91 | 92 | 93 | def _get_stream(device): 94 | """Gets a background stream for copying between CPU and GPU""" 95 | global _streams 96 | if device == -1: 97 | return None 98 | if _streams is None: 99 | _streams = [None] * torch.cuda.device_count() 100 | if _streams[device] is None: 101 | _streams[device] = torch.cuda.Stream(device) 102 | return _streams[device] 103 | -------------------------------------------------------------------------------- /lib/nn/parallel/parallel_apply.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | 6 | def get_a_var(obj): 7 | if isinstance(obj, Variable): 8 | return obj 9 | 10 | if isinstance(obj, list) or isinstance(obj, tuple): 11 | results = map(get_a_var, obj) 12 | for result in results: 13 | if isinstance(result, Variable): 14 | return result 15 | if isinstance(obj, dict): 16 | results = map(get_a_var, obj.items()) 17 | for result in results: 18 | if isinstance(result, Variable): 19 | return result 20 | return None 21 | 22 | 23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): 24 | assert len(modules) == len(inputs) 25 | if kwargs_tup is not None: 26 | assert len(modules) == len(kwargs_tup) 27 | else: 28 | kwargs_tup = ({},) * len(modules) 29 | if devices is not None: 30 | assert len(modules) == len(devices) 31 | else: 32 | devices = [None] * len(modules) 33 | 34 | lock = threading.Lock() 35 | results = {} 36 | 37 | def _worker(i, module, input, kwargs, results, lock, device=None): 38 | if device is None: 39 | device = get_a_var(input).get_device() 40 | try: 41 | with torch.cuda.device(device): 42 | output = module(*input, **kwargs) 43 | with lock: 44 | results[i] = output 45 | except Exception as e: 46 | with lock: 47 | results[i] = e 48 | 49 | if len(modules) > 1: 50 | threads = [threading.Thread(target=_worker, 51 | args=(i, module, input, kwargs, results, lock, device), 52 | ) 53 | for i, (module, input, kwargs, device) in 54 | enumerate(zip(modules, inputs, kwargs_tup, devices))] 55 | 56 | for thread in threads: 57 | thread.start() 58 | for thread in threads: 59 | thread.join() 60 | else: 61 | _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0]) 62 | 63 | outputs = [] 64 | for i in range(len(inputs)): 65 | output = results[i] 66 | if isinstance(output, Exception): 67 | raise output 68 | outputs.append(output) 69 | return outputs 70 | -------------------------------------------------------------------------------- /lib/nn/parallel/replicate.py: -------------------------------------------------------------------------------- 1 | import torch.cuda.comm as comm 2 | 3 | 4 | def replicate(network, devices): 5 | from ._functions import Broadcast 6 | 7 | devices = tuple(devices) 8 | num_replicas = len(devices) 9 | 10 | params = list(network.parameters()) 11 | param_indices = {param: idx for idx, param in enumerate(params)} 12 | param_copies = Broadcast.apply(devices, *params) 13 | if len(params) > 0: 14 | param_copies = [param_copies[i:i + len(params)] 15 | for i in range(0, len(param_copies), len(params))] 16 | 17 | buffers = list(network._all_buffers()) 18 | buffer_indices = {buf: idx for idx, buf in enumerate(buffers)} 19 | buffer_copies = comm.broadcast_coalesced(buffers, devices) 20 | 21 | modules = list(network.modules()) 22 | module_copies = [[] for device in devices] 23 | module_indices = {} 24 | 25 | for i, module in enumerate(modules): 26 | module_indices[module] = i 27 | for j in range(num_replicas): 28 | replica = module.__new__(type(module)) 29 | replica.__dict__ = module.__dict__.copy() 30 | replica._parameters = replica._parameters.copy() 31 | replica._buffers = replica._buffers.copy() 32 | replica._modules = replica._modules.copy() 33 | module_copies[j].append(replica) 34 | 35 | for i, module in enumerate(modules): 36 | for key, child in module._modules.items(): 37 | if child is None: 38 | for j in range(num_replicas): 39 | replica = module_copies[j][i] 40 | replica._modules[key] = None 41 | else: 42 | module_idx = module_indices[child] 43 | for j in range(num_replicas): 44 | replica = module_copies[j][i] 45 | replica._modules[key] = module_copies[j][module_idx] 46 | for key, param in module._parameters.items(): 47 | if param is None: 48 | for j in range(num_replicas): 49 | replica = module_copies[j][i] 50 | replica._parameters[key] = None 51 | else: 52 | param_idx = param_indices[param] 53 | for j in range(num_replicas): 54 | replica = module_copies[j][i] 55 | replica._parameters[key] = param_copies[j][param_idx] 56 | for key, buf in module._buffers.items(): 57 | if buf is None: 58 | for j in range(num_replicas): 59 | replica = module_copies[j][i] 60 | replica._buffers[key] = None 61 | else: 62 | buffer_idx = buffer_indices[buf] 63 | for j in range(num_replicas): 64 | replica = module_copies[j][i] 65 | replica._buffers[key] = buffer_copies[j][buffer_idx] 66 | 67 | return [module_copies[j][0] for j in range(num_replicas)] 68 | -------------------------------------------------------------------------------- /lib/nn/parallel/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import re 3 | import numpy as np 4 | import torch 5 | from torch.autograd import Variable 6 | from ._functions import Scatter, Gather 7 | from torch._six import string_classes, int_classes 8 | from torch.utils.data.dataloader import numpy_type_map 9 | 10 | 11 | def scatter(inputs, target_gpus, dim=0): 12 | r""" 13 | Slices variables into approximately equal chunks and 14 | distributes them across given GPUs. Duplicates 15 | references to objects that are not variables. Does not 16 | support Tensors. 17 | """ 18 | def scatter_map(obj): 19 | if isinstance(obj, Variable): 20 | return Scatter.apply(target_gpus, None, dim, obj) 21 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 22 | if isinstance(obj, tuple) and len(obj) > 0: 23 | return list(zip(*map(scatter_map, obj))) 24 | if isinstance(obj, list) and len(obj) > 0: 25 | return list(map(list, zip(*map(scatter_map, obj)))) 26 | if isinstance(obj, dict) and len(obj) > 0: 27 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 28 | return [obj for targets in target_gpus] 29 | 30 | # After scatter_map is called, a scatter_map cell will exist. This cell 31 | # has a reference to the actual function scatter_map, which has references 32 | # to a closure that has a reference to the scatter_map cell (because the 33 | # fn is recursive). To avoid this reference cycle, we set the function to 34 | # None, clearing the cell 35 | try: 36 | return scatter_map(inputs) 37 | finally: 38 | scatter_map = None 39 | 40 | 41 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): 42 | r"""Scatter with support for kwargs dictionary""" 43 | inputs = scatter(inputs, target_gpus, dim) if inputs else [] 44 | kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] 45 | if len(inputs) < len(kwargs): 46 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 47 | elif len(kwargs) < len(inputs): 48 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 49 | inputs = tuple(inputs) 50 | kwargs = tuple(kwargs) 51 | return inputs, kwargs 52 | 53 | 54 | def gather(outputs, target_device, dim=0): 55 | r""" 56 | Gathers variables from different GPUs on a specified device 57 | (-1 means the CPU). 58 | """ 59 | error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}" 60 | 61 | def gather_map(outputs): 62 | out = outputs[0] 63 | elem_type = type(out) 64 | if isinstance(out, Variable): 65 | return Gather.apply(target_device, dim, *outputs) 66 | if out is None: 67 | return None 68 | if isinstance(out, collections.Sequence): 69 | return type(out)(map(gather_map, zip(*outputs))) 70 | elif isinstance(out, collections.Mapping): 71 | return {key: gather_map([d[key] for d in outputs]) for key in out} 72 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 73 | and elem_type.__name__ != 'string_': 74 | elem = out 75 | if elem_type.__name__ == 'ndarray': 76 | # array of string classes and object 77 | if re.search('[SaUO]', elem.dtype.str) is not None: 78 | raise TypeError(error_msg.format(elem.dtype)) 79 | 80 | return Variable(torch.from_numpy(np.concatenate(outputs, dim))) 81 | if elem.shape == (): # scalars 82 | py_type = float if elem.dtype.name.startswith('float') else int 83 | return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs)))) 84 | elif isinstance(out, int_classes): 85 | return Variable(torch.LongTensor(outputs)) 86 | elif isinstance(out, float): 87 | return Variable(torch.DoubleTensor(outputs)) 88 | elif isinstance(out, string_classes): 89 | return outputs 90 | 91 | raise TypeError((error_msg.format(elem_type))) 92 | 93 | # Recursive function calls like this create reference cycles. 94 | # Setting the function to None clears the refcycle. 95 | try: 96 | return gather_map(outputs) 97 | finally: 98 | gather_map = None 99 | -------------------------------------------------------------------------------- /lib/roi_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/roi_data/__init__.py -------------------------------------------------------------------------------- /lib/roi_data/data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | from __future__ import unicode_literals 23 | 24 | from collections import namedtuple 25 | import logging 26 | import numpy as np 27 | import threading 28 | 29 | from core.config import cfg 30 | from modeling.generate_anchors import generate_anchors 31 | import utils.boxes as box_utils 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range 38 | # of aspect ratios 39 | FieldOfAnchors = namedtuple( 40 | 'FieldOfAnchors', [ 41 | 'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size', 42 | 'octave', 'aspect' 43 | ] 44 | ) 45 | 46 | # Cache for memoizing _get_field_of_anchors 47 | _threadlocal_foa = threading.local() 48 | 49 | 50 | def get_field_of_anchors( 51 | stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None 52 | ): 53 | global _threadlocal_foa 54 | if not hasattr(_threadlocal_foa, 'cache'): 55 | _threadlocal_foa.cache = {} 56 | 57 | cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) 58 | if cache_key in _threadlocal_foa.cache: 59 | return _threadlocal_foa.cache[cache_key] 60 | 61 | # Anchors at a single feature cell 62 | cell_anchors = generate_anchors( 63 | stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios 64 | ) 65 | num_cell_anchors = cell_anchors.shape[0] 66 | 67 | # Generate canonical proposals from shifted anchors 68 | # Enumerate all shifted positions on the (H, W) grid 69 | fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( 70 | cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) 71 | ) 72 | field_size = int(np.ceil(fpn_max_size / float(stride))) 73 | shifts = np.arange(0, field_size) * stride 74 | shift_x, shift_y = np.meshgrid(shifts, shifts) 75 | shift_x = shift_x.ravel() 76 | shift_y = shift_y.ravel() 77 | shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() 78 | 79 | # Broacast anchors over shifts to enumerate all anchors at all positions 80 | # in the (H, W) grid: 81 | # - add A cell anchors of shape (1, A, 4) to 82 | # - K shifts of shape (K, 1, 4) to get 83 | # - all shifted anchors of shape (K, A, 4) 84 | # - reshape to (K*A, 4) shifted anchors 85 | A = num_cell_anchors 86 | K = shifts.shape[0] 87 | field_of_anchors = ( 88 | cell_anchors.reshape((1, A, 4)) + 89 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 90 | ) 91 | field_of_anchors = field_of_anchors.reshape((K * A, 4)) 92 | foa = FieldOfAnchors( 93 | field_of_anchors=field_of_anchors.astype(np.float32), 94 | num_cell_anchors=num_cell_anchors, 95 | stride=stride, 96 | field_size=field_size, 97 | octave=octave, 98 | aspect=aspect 99 | ) 100 | _threadlocal_foa.cache[cache_key] = foa 101 | return foa 102 | 103 | 104 | def unmap(data, count, inds, fill=0): 105 | """Unmap a subset of item (data) back to the original set of items (of 106 | size count)""" 107 | if count == len(inds): 108 | return data 109 | 110 | if len(data.shape) == 1: 111 | ret = np.empty((count, ), dtype=data.dtype) 112 | ret.fill(fill) 113 | ret[inds] = data 114 | else: 115 | ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype) 116 | ret.fill(fill) 117 | ret[inds, :] = data 118 | return ret 119 | 120 | 121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)): 122 | """Compute bounding-box regression targets for an image.""" 123 | return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype( 124 | np.float32, copy=False 125 | ) 126 | -------------------------------------------------------------------------------- /lib/roi_data/keypoint_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Construct minibatches for Mask R-CNN training when keypoints are enabled. 16 | Handles the minibatch blobs that are specific to training Mask R-CNN for 17 | keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are 18 | handled by their respecitive roi_data modules. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | from __future__ import unicode_literals 25 | 26 | import numpy as np 27 | 28 | from core.config import cfg 29 | import utils.blob as blob_utils 30 | import utils.keypoints as keypoint_utils 31 | 32 | 33 | def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, 34 | batch_idx): 35 | """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" 36 | # Note: gt_inds must match how they're computed in 37 | # datasets.json_dataset._merge_proposal_boxes_into_roidb 38 | gt_inds = np.where(roidb['gt_classes'] > 0)[0] 39 | max_overlaps = roidb['max_overlaps'] 40 | gt_keypoints = roidb['gt_keypoints'] 41 | 42 | ind_kp = gt_inds[roidb['box_to_gt_ind_map']] 43 | within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) 44 | vis_kp = gt_keypoints[ind_kp, 2, :] > 0 45 | is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 46 | kp_fg_inds = np.where( 47 | np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0] 48 | 49 | kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) 50 | if kp_fg_inds.size > kp_fg_rois_per_this_image: 51 | kp_fg_inds = np.random.choice( 52 | kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False) 53 | 54 | sampled_fg_rois = roidb['boxes'][kp_fg_inds] 55 | box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] 56 | 57 | num_keypoints = gt_keypoints.shape[2] 58 | sampled_keypoints = -np.ones( 59 | (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), 60 | dtype=gt_keypoints.dtype) 61 | for ii in range(len(sampled_fg_rois)): 62 | ind = box_to_gt_ind_map[ii] 63 | if ind >= 0: 64 | sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] 65 | assert np.sum(sampled_keypoints[ii, 2, :]) > 0 66 | 67 | heats, weights = keypoint_utils.keypoints_to_heatmap_labels( 68 | sampled_keypoints, sampled_fg_rois) 69 | 70 | shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) 71 | heats = heats.reshape(shape) 72 | weights = weights.reshape(shape) 73 | 74 | sampled_fg_rois *= im_scale 75 | repeated_batch_idx = batch_idx * blob_utils.ones((sampled_fg_rois.shape[0], 76 | 1)) 77 | sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) 78 | 79 | blobs['keypoint_rois'] = sampled_fg_rois 80 | blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) 81 | blobs['keypoint_weights'] = weights 82 | 83 | 84 | def finalize_keypoint_minibatch(blobs, valid): 85 | """Finalize the minibatch after blobs for all minibatch images have been 86 | collated. 87 | """ 88 | min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH 89 | num_visible_keypoints = np.sum(blobs['keypoint_weights']) 90 | valid = (valid and len(blobs['keypoint_weights']) > 0 91 | and num_visible_keypoints > min_count) 92 | # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False. 93 | # See modeling.model_builder.add_keypoint_losses 94 | norm = num_visible_keypoints / ( 95 | cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.TRAIN. 96 | FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS) 97 | blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32) 98 | return valid 99 | 100 | 101 | def _within_box(points, boxes): 102 | """Validate which keypoints are contained inside a given box. 103 | 104 | points: Nx2xK 105 | boxes: Nx4 106 | output: NxK 107 | """ 108 | x_within = np.logical_and( 109 | points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1), 110 | points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1)) 111 | y_within = np.logical_and( 112 | points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1), 113 | points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1)) 114 | return np.logical_and(x_within, y_within) 115 | -------------------------------------------------------------------------------- /lib/roi_data/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Construct minibatches for Mask R-CNN training. Handles the minibatch blobs 16 | that are specific to Mask R-CNN. Other blobs that are generic to RPN or 17 | Fast/er R-CNN are handled by their respecitive roi_data modules. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | import logging 26 | import numpy as np 27 | 28 | from core.config import cfg 29 | import utils.blob as blob_utils 30 | import utils.boxes as box_utils 31 | import utils.segms as segm_utils 32 | 33 | 34 | def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): 35 | """Add Mask R-CNN specific blobs to the input blob dictionary.""" 36 | # Prepare the mask targets by associating one gt mask to each training roi 37 | # that has a fg (non-bg) class label. 38 | M = cfg.MRCNN.RESOLUTION 39 | polys_gt_inds = np.where((roidb['gt_classes'] > 0) & 40 | (roidb['is_crowd'] == 0))[0] 41 | polys_gt = [roidb['segms'][i] for i in polys_gt_inds] 42 | boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) 43 | # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] 44 | fg_inds = np.where(blobs['labels_int32'] > 0)[0] 45 | roi_has_mask = blobs['labels_int32'].copy() 46 | roi_has_mask[roi_has_mask > 0] = 1 47 | 48 | if fg_inds.shape[0] > 0: 49 | # Class labels for the foreground rois 50 | mask_class_labels = blobs['labels_int32'][fg_inds] 51 | masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) 52 | 53 | # Find overlap between all foreground rois and the bounding boxes 54 | # enclosing each segmentation 55 | rois_fg = sampled_boxes[fg_inds] 56 | overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( 57 | rois_fg.astype(np.float32, copy=False), 58 | boxes_from_polys.astype(np.float32, copy=False)) 59 | # Map from each fg rois to the index of the mask with highest overlap 60 | # (measured by bbox overlap) 61 | fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) 62 | 63 | # add fg targets 64 | for i in range(rois_fg.shape[0]): 65 | fg_polys_ind = fg_polys_inds[i] 66 | poly_gt = polys_gt[fg_polys_ind] 67 | roi_fg = rois_fg[i] 68 | # Rasterize the portion of the polygon mask within the given fg roi 69 | # to an M x M binary image 70 | mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) 71 | mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary 72 | masks[i, :] = np.reshape(mask, M**2) 73 | else: # If there are no fg masks (it does happen) 74 | # The network cannot handle empty blobs, so we must provide a mask 75 | # We simply take the first bg roi, given it an all -1's mask (ignore 76 | # label), and label it with class zero (bg). 77 | bg_inds = np.where(blobs['labels_int32'] == 0)[0] 78 | # rois_fg is actually one background roi, but that's ok because ... 79 | rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) 80 | # We give it an -1's blob (ignore label) 81 | masks = -blob_utils.ones((1, M**2), int32=True) 82 | # We label it with class = 0 (background) 83 | mask_class_labels = blob_utils.zeros((1, )) 84 | # Mark that the first roi has a mask 85 | roi_has_mask[0] = 1 86 | 87 | if cfg.MRCNN.CLS_SPECIFIC_MASK: 88 | masks = _expand_to_class_specific_mask_targets(masks, 89 | mask_class_labels) 90 | 91 | # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) 92 | rois_fg *= im_scale 93 | repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) 94 | rois_fg = np.hstack((repeated_batch_idx, rois_fg)) 95 | 96 | # Update blobs dict with Mask R-CNN blobs 97 | blobs['mask_rois'] = rois_fg 98 | blobs['roi_has_mask_int32'] = roi_has_mask 99 | blobs['masks_int32'] = masks 100 | 101 | 102 | def _expand_to_class_specific_mask_targets(masks, mask_class_labels): 103 | """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) 104 | to encode class specific mask targets. 105 | """ 106 | assert masks.shape[0] == mask_class_labels.shape[0] 107 | M = cfg.MRCNN.RESOLUTION 108 | 109 | # Target values of -1 are "don't care" / ignore labels 110 | mask_targets = -blob_utils.ones( 111 | (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True) 112 | 113 | for i in range(masks.shape[0]): 114 | cls = int(mask_class_labels[i]) 115 | start = M**2 * cls 116 | end = start + M**2 117 | # Ignore background instance 118 | # (only happens when there is no fg samples in an image) 119 | if cls > 0: 120 | mask_targets[i, start:end] = masks[i, :] 121 | 122 | return mask_targets 123 | -------------------------------------------------------------------------------- /lib/roi_data/minibatch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from core.config import cfg 5 | import utils.blob as blob_utils 6 | import roi_data.rpn 7 | 8 | 9 | def get_minibatch_blob_names(is_training=True): 10 | """Return blob names in the order in which they are read by the data loader. 11 | """ 12 | # data blob: holds a batch of N images, each with 3 channels 13 | blob_names = ['data'] 14 | if cfg.RPN.RPN_ON: 15 | # RPN-only or end-to-end Faster R-CNN 16 | blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training) 17 | elif cfg.RETINANET.RETINANET_ON: 18 | raise NotImplementedError 19 | else: 20 | # Fast R-CNN like models trained on precomputed proposals 21 | blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names( 22 | is_training=is_training 23 | ) 24 | return blob_names 25 | 26 | 27 | def get_minibatch(roidb): 28 | """Given a roidb, construct a minibatch sampled from it.""" 29 | # We collect blobs from each image onto a list and then concat them into a 30 | # single tensor, hence we initialize each blob to an empty list 31 | blobs = {k: [] for k in get_minibatch_blob_names()} 32 | 33 | # Get the input image blob 34 | im_blob, im_scales = _get_image_blob(roidb) 35 | blobs['data'] = im_blob 36 | if cfg.RPN.RPN_ON: 37 | # RPN-only or end-to-end Faster/Mask R-CNN 38 | valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb) 39 | elif cfg.RETINANET.RETINANET_ON: 40 | raise NotImplementedError 41 | else: 42 | # Fast R-CNN like models trained on precomputed proposals 43 | valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 44 | return blobs, valid 45 | 46 | 47 | def _get_image_blob(roidb): 48 | """Builds an input blob from the images in the roidb at the specified 49 | scales. 50 | """ 51 | num_images = len(roidb) 52 | # Sample random scales to use for each image in this batch 53 | scale_inds = np.random.randint( 54 | 0, high=len(cfg.TRAIN.SCALES), size=num_images) 55 | processed_ims = [] 56 | im_scales = [] 57 | for i in range(num_images): 58 | im = cv2.imread(roidb[i]['image']) 59 | assert im is not None, \ 60 | 'Failed to read image \'{}\''.format(roidb[i]['image']) 61 | # If NOT using opencv to read in images, uncomment following lines 62 | # if len(im.shape) == 2: 63 | # im = im[:, :, np.newaxis] 64 | # im = np.concatenate((im, im, im), axis=2) 65 | # # flip the channel, since the original one using cv2 66 | # # rgb -> bgr 67 | # im = im[:, :, ::-1] 68 | if roidb[i]['flipped']: 69 | im = im[:, ::-1, :] 70 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 71 | im, im_scale = blob_utils.prep_im_for_blob( 72 | im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) 73 | im_scales.append(im_scale[0]) 74 | processed_ims.append(im[0]) 75 | 76 | # Create a blob to hold the input images [n, c, h, w] 77 | blob = blob_utils.im_list_to_blob(processed_ims) 78 | 79 | return blob, im_scales 80 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | import os 10 | from os.path import join as pjoin 11 | import numpy as np 12 | from distutils.core import setup 13 | from distutils.extension import Extension 14 | from Cython.Distutils import build_ext 15 | 16 | 17 | def find_in_path(name, path): 18 | "Find a file in a search path" 19 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | # def locate_cuda(): 28 | # """Locate the CUDA environment on the system 29 | # 30 | # Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | # and values giving the absolute path to each directory. 32 | # 33 | # Starts by looking for the CUDAHOME env variable. If not found, everything 34 | # is based on finding 'nvcc' in the PATH. 35 | # """ 36 | # 37 | # # first check if the CUDAHOME env variable is in use 38 | # if 'CUDAHOME' in os.environ: 39 | # home = os.environ['CUDAHOME'] 40 | # nvcc = pjoin(home, 'bin', 'nvcc') 41 | # else: 42 | # # otherwise, search the PATH for NVCC 43 | # default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | # nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | # if nvcc is None: 46 | # raise EnvironmentError('The nvcc binary could not be ' 47 | # 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | # home = os.path.dirname(os.path.dirname(nvcc)) 49 | # 50 | # cudaconfig = {'home': home, 'nvcc': nvcc, 51 | # 'include': pjoin(home, 'include'), 52 | # 'lib64': pjoin(home, 'lib64')} 53 | # for k, v in cudaconfig.iteritems(): 54 | # if not os.path.exists(v): 55 | # raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | # 57 | # return cudaconfig 58 | 59 | 60 | # CUDA = locate_cuda() 61 | 62 | # Obtain the numpy include directory. This logic works across numpy versions. 63 | try: 64 | numpy_include = np.get_include() 65 | except AttributeError: 66 | numpy_include = np.get_numpy_include() 67 | 68 | 69 | def customize_compiler_for_nvcc(self): 70 | """inject deep into distutils to customize how the dispatch 71 | to gcc/nvcc works. 72 | 73 | If you subclass UnixCCompiler, it's not trivial to get your subclass 74 | injected in, and still have the right customizations (i.e. 75 | distutils.sysconfig.customize_compiler) run on it. So instead of going 76 | the OO route, I have this. Note, it's kindof like a wierd functional 77 | subclassing going on.""" 78 | 79 | # tell the compiler it can processes .cu 80 | self.src_extensions.append('.cu') 81 | 82 | # save references to the default compiler_so and _comple methods 83 | default_compiler_so = self.compiler_so 84 | super = self._compile 85 | 86 | # now redefine the _compile method. This gets executed for each 87 | # object but distutils doesn't have the ability to change compilers 88 | # based on source extension: we add it. 89 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 90 | print(extra_postargs) 91 | if os.path.splitext(src)[1] == '.cu': 92 | # use the cuda for .cu files 93 | self.set_executable('compiler_so', CUDA['nvcc']) 94 | # use only a subset of the extra_postargs, which are 1-1 translated 95 | # from the extra_compile_args in the Extension class 96 | postargs = extra_postargs['nvcc'] 97 | else: 98 | postargs = extra_postargs['gcc'] 99 | 100 | super(obj, src, ext, cc_args, postargs, pp_opts) 101 | # reset the default compiler_so, which we might have changed for cuda 102 | self.compiler_so = default_compiler_so 103 | 104 | # inject our redefined _compile method into the class 105 | self._compile = _compile 106 | 107 | 108 | # run the customize_compiler 109 | class custom_build_ext(build_ext): 110 | def build_extensions(self): 111 | customize_compiler_for_nvcc(self.compiler) 112 | build_ext.build_extensions(self) 113 | 114 | 115 | ext_modules = [ 116 | Extension( 117 | name='utils.cython_bbox', 118 | sources=['utils/cython_bbox.pyx'], 119 | extra_compile_args={'gcc': ['-Wno-cpp']}, 120 | include_dirs=[numpy_include] 121 | ), 122 | Extension( 123 | name='utils.cython_nms', 124 | sources=['utils/cython_nms.pyx'], 125 | extra_compile_args={'gcc': ['-Wno-cpp']}, 126 | include_dirs=[numpy_include] 127 | ) 128 | ] 129 | 130 | setup( 131 | name='mask_rcnn', 132 | ext_modules=ext_modules, 133 | # inject our custom trigger 134 | cmdclass={'build_ext': custom_build_ext}, 135 | ) 136 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick 22 | # -------------------------------------------------------- 23 | """blob helper functions.""" 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | from __future__ import unicode_literals 29 | 30 | from six.moves import cPickle as pickle 31 | import numpy as np 32 | import cv2 33 | 34 | from core.config import cfg 35 | 36 | 37 | def get_image_blob(im, target_scale, target_max_size): 38 | """Convert an image into a network input. 39 | 40 | Arguments: 41 | im (ndarray): a color image in BGR order 42 | 43 | Returns: 44 | blob (ndarray): a data blob holding an image pyramid 45 | im_scale (float): image scale (target size) / (original size) 46 | im_info (ndarray) 47 | """ 48 | processed_im, im_scale = prep_im_for_blob( 49 | im, cfg.PIXEL_MEANS, [target_scale], target_max_size 50 | ) 51 | blob = im_list_to_blob(processed_im) 52 | # NOTE: this height and width may be larger than actual scaled input image 53 | # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are 54 | # maintaining this behavior for now to make existing results exactly 55 | # reproducible (in practice using the true input image height and width 56 | # yields nearly the same results, but they are sometimes slightly different 57 | # because predictions near the edge of the image will be pruned more 58 | # aggressively). 59 | height, width = blob.shape[2], blob.shape[3] 60 | im_info = np.hstack((height, width, im_scale))[np.newaxis, :] 61 | return blob, im_scale, im_info.astype(np.float32) 62 | 63 | 64 | def im_list_to_blob(ims): 65 | """Convert a list of images into a network input. Assumes images were 66 | prepared using prep_im_for_blob or equivalent: i.e. 67 | - BGR channel order 68 | - pixel means subtracted 69 | - resized to the desired input size 70 | - float32 numpy ndarray format 71 | Output is a 4D HCHW tensor of the images concatenated along axis 0 with 72 | shape. 73 | """ 74 | if not isinstance(ims, list): 75 | ims = [ims] 76 | max_shape = get_max_shape([im.shape[:2] for im in ims]) 77 | 78 | num_images = len(ims) 79 | blob = np.zeros( 80 | (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) 81 | for i in range(num_images): 82 | im = ims[i] 83 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 84 | # Move channels (axis 3) to axis 1 85 | # Axis order will become: (batch elem, channel, height, width) 86 | channel_swap = (0, 3, 1, 2) 87 | blob = blob.transpose(channel_swap) 88 | return blob 89 | 90 | 91 | def get_max_shape(im_shapes): 92 | """Calculate max spatial size (h, w) for batching given a list of image shapes 93 | """ 94 | max_shape = np.array(im_shapes).max(axis=0) 95 | assert max_shape.size == 2 96 | # Pad the image so they can be divisible by a stride 97 | if cfg.FPN.FPN_ON: 98 | stride = float(cfg.FPN.COARSEST_STRIDE) 99 | max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) 100 | max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) 101 | return max_shape 102 | 103 | 104 | def prep_im_for_blob(im, pixel_means, target_sizes, max_size): 105 | """Prepare an image for use as a network input blob. Specially: 106 | - Subtract per-channel pixel mean 107 | - Convert to float32 108 | - Rescale to each of the specified target size (capped at max_size) 109 | Returns a list of transformed images, one for each target size. Also returns 110 | the scale factors that were used to compute each returned image. 111 | """ 112 | im = im.astype(np.float32, copy=False) 113 | im -= pixel_means 114 | im_shape = im.shape 115 | im_size_min = np.min(im_shape[0:2]) 116 | im_size_max = np.max(im_shape[0:2]) 117 | 118 | ims = [] 119 | im_scales = [] 120 | for target_size in target_sizes: 121 | im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) 122 | im_resized = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 123 | interpolation=cv2.INTER_LINEAR) 124 | ims.append(im_resized) 125 | im_scales.append(im_scale) 126 | return ims, im_scales 127 | 128 | 129 | def get_im_blob_sizes(im_shape, target_sizes, max_size): 130 | """Calculate im blob size for multiple target_sizes given original im shape 131 | """ 132 | im_size_min = np.min(im_shape) 133 | im_size_max = np.max(im_shape) 134 | im_sizes = [] 135 | for target_size in target_sizes: 136 | im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) 137 | im_sizes.append(np.round(im_shape * im_scale)) 138 | return np.array(im_sizes) 139 | 140 | 141 | def get_target_scale(im_size_min, im_size_max, target_size, max_size): 142 | """Calculate target resize scale 143 | """ 144 | im_scale = float(target_size) / float(im_size_min) 145 | # Prevent the biggest axis from being more than max_size 146 | if np.round(im_scale * im_size_max) > max_size: 147 | im_scale = float(max_size) / float(im_size_max) 148 | return im_scale 149 | 150 | 151 | def zeros(shape, int32=False): 152 | """Return a blob of all zeros of the given shape with the correct float or 153 | int data type. 154 | """ 155 | return np.zeros(shape, dtype=np.int32 if int32 else np.float32) 156 | 157 | 158 | def ones(shape, int32=False): 159 | """Return a blob of all ones of the given shape with the correct float or 160 | int data type. 161 | """ 162 | return np.ones(shape, dtype=np.int32 if int32 else np.float32) 163 | 164 | 165 | def serialize(obj): 166 | """Serialize a Python object using pickle and encode it as an array of 167 | float32 values so that it can be feed into the workspace. See deserialize(). 168 | """ 169 | return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32) 170 | 171 | 172 | def deserialize(arr): 173 | """Unserialize a Python object from an array of float32 values fetched from 174 | a workspace. See serialize(). 175 | """ 176 | return pickle.loads(arr.astype(np.uint8).tobytes()) 177 | -------------------------------------------------------------------------------- /lib/utils/collections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """A simple attribute dictionary used for representing configuration options.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | 24 | class AttrDict(dict): 25 | 26 | IMMUTABLE = '__immutable__' 27 | 28 | def __init__(self, *args, **kwargs): 29 | super(AttrDict, self).__init__(*args, **kwargs) 30 | self.__dict__[AttrDict.IMMUTABLE] = False 31 | 32 | def __getattr__(self, name): 33 | if name in self.__dict__: 34 | return self.__dict__[name] 35 | elif name in self: 36 | return self[name] 37 | else: 38 | raise AttributeError(name) 39 | 40 | def __setattr__(self, name, value): 41 | if not self.__dict__[AttrDict.IMMUTABLE]: 42 | if name in self.__dict__: 43 | self.__dict__[name] = value 44 | else: 45 | self[name] = value 46 | else: 47 | raise AttributeError( 48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 49 | format(name, value) 50 | ) 51 | 52 | def immutable(self, is_immutable): 53 | """Set immutability to is_immutable and recursively apply the setting 54 | to all nested AttrDicts. 55 | """ 56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 57 | # Recursively set immutable state 58 | for v in self.__dict__.values(): 59 | if isinstance(v, AttrDict): 60 | v.immutable(is_immutable) 61 | for v in self.values(): 62 | if isinstance(v, AttrDict): 63 | v.immutable(is_immutable) 64 | 65 | def is_immutable(self): 66 | return self.__dict__[AttrDict.IMMUTABLE] 67 | -------------------------------------------------------------------------------- /lib/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """An awesome colormap for really neat visualizations.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import numpy as np 24 | 25 | 26 | def colormap(rgb=False): 27 | color_list = np.array( 28 | [ 29 | 0.000, 0.447, 0.741, 30 | 0.850, 0.325, 0.098, 31 | 0.929, 0.694, 0.125, 32 | 0.494, 0.184, 0.556, 33 | 0.466, 0.674, 0.188, 34 | 0.301, 0.745, 0.933, 35 | 0.635, 0.078, 0.184, 36 | 0.300, 0.300, 0.300, 37 | 0.600, 0.600, 0.600, 38 | 1.000, 0.000, 0.000, 39 | 1.000, 0.500, 0.000, 40 | 0.749, 0.749, 0.000, 41 | 0.000, 1.000, 0.000, 42 | 0.000, 0.000, 1.000, 43 | 0.667, 0.000, 1.000, 44 | 0.333, 0.333, 0.000, 45 | 0.333, 0.667, 0.000, 46 | 0.333, 1.000, 0.000, 47 | 0.667, 0.333, 0.000, 48 | 0.667, 0.667, 0.000, 49 | 0.667, 1.000, 0.000, 50 | 1.000, 0.333, 0.000, 51 | 1.000, 0.667, 0.000, 52 | 1.000, 1.000, 0.000, 53 | 0.000, 0.333, 0.500, 54 | 0.000, 0.667, 0.500, 55 | 0.000, 1.000, 0.500, 56 | 0.333, 0.000, 0.500, 57 | 0.333, 0.333, 0.500, 58 | 0.333, 0.667, 0.500, 59 | 0.333, 1.000, 0.500, 60 | 0.667, 0.000, 0.500, 61 | 0.667, 0.333, 0.500, 62 | 0.667, 0.667, 0.500, 63 | 0.667, 1.000, 0.500, 64 | 1.000, 0.000, 0.500, 65 | 1.000, 0.333, 0.500, 66 | 1.000, 0.667, 0.500, 67 | 1.000, 1.000, 0.500, 68 | 0.000, 0.333, 1.000, 69 | 0.000, 0.667, 1.000, 70 | 0.000, 1.000, 1.000, 71 | 0.333, 0.000, 1.000, 72 | 0.333, 0.333, 1.000, 73 | 0.333, 0.667, 1.000, 74 | 0.333, 1.000, 1.000, 75 | 0.667, 0.000, 1.000, 76 | 0.667, 0.333, 1.000, 77 | 0.667, 0.667, 1.000, 78 | 0.667, 1.000, 1.000, 79 | 1.000, 0.000, 1.000, 80 | 1.000, 0.333, 1.000, 81 | 1.000, 0.667, 1.000, 82 | 0.167, 0.000, 0.000, 83 | 0.333, 0.000, 0.000, 84 | 0.500, 0.000, 0.000, 85 | 0.667, 0.000, 0.000, 86 | 0.833, 0.000, 0.000, 87 | 1.000, 0.000, 0.000, 88 | 0.000, 0.167, 0.000, 89 | 0.000, 0.333, 0.000, 90 | 0.000, 0.500, 0.000, 91 | 0.000, 0.667, 0.000, 92 | 0.000, 0.833, 0.000, 93 | 0.000, 1.000, 0.000, 94 | 0.000, 0.000, 0.167, 95 | 0.000, 0.000, 0.333, 96 | 0.000, 0.000, 0.500, 97 | 0.000, 0.000, 0.667, 98 | 0.000, 0.000, 0.833, 99 | 0.000, 0.000, 1.000, 100 | 0.000, 0.000, 0.000, 101 | 0.143, 0.143, 0.143, 102 | 0.286, 0.286, 0.286, 103 | 0.429, 0.429, 0.429, 104 | 0.571, 0.571, 0.571, 105 | 0.714, 0.714, 0.714, 106 | 0.857, 0.857, 0.857, 107 | 1.000, 1.000, 1.000 108 | ] 109 | ).astype(np.float32) 110 | color_list = color_list.reshape((-1, 3)) * 255 111 | if not rgb: 112 | color_list = color_list[:, ::-1] 113 | return color_list 114 | -------------------------------------------------------------------------------- /lib/utils/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /lib/utils/detectron_weight_helper.py: -------------------------------------------------------------------------------- 1 | """Helper functions for loading pretrained weights from Detectron pickle files 2 | """ 3 | 4 | import pickle 5 | import re 6 | import torch 7 | 8 | 9 | def load_detectron_weight(net, detectron_weight_file): 10 | name_mapping, orphan_in_detectron = net.detectron_weight_mapping 11 | 12 | with open(detectron_weight_file, 'rb') as fp: 13 | src_blobs = pickle.load(fp, encoding='latin1') 14 | if 'blobs' in src_blobs: 15 | src_blobs = src_blobs['blobs'] 16 | 17 | params = net.state_dict() 18 | for p_name, p_tensor in params.items(): 19 | d_name = name_mapping[p_name] 20 | if d_name: # if not None of 0 21 | p_tensor.copy_(torch.Tensor(src_blobs[d_name])) 22 | 23 | 24 | def resnet_weights_name_pattern(): 25 | pattern = re.compile(r"conv1_w|res_conv1_.+|res\d_\d_.+") 26 | return pattern 27 | 28 | 29 | if __name__ == '__main__': 30 | """Testing""" 31 | from pprint import pprint 32 | import sys 33 | sys.path.insert(0, '..') 34 | from modeling.model_builder import Generalized_RCNN 35 | from core.config import cfg, cfg_from_file 36 | 37 | cfg.MODEL.NUM_CLASSES = 81 38 | cfg_from_file('../../cfgs/res50_mask.yml') 39 | net = Generalized_RCNN() 40 | 41 | # pprint(list(net.state_dict().keys()), width=1) 42 | 43 | mapping, orphans = net.detectron_weight_mapping 44 | state_dict = net.state_dict() 45 | 46 | for k in mapping.keys(): 47 | assert k in state_dict, '%s' % k 48 | 49 | rest = set(state_dict.keys()) - set(mapping.keys()) 50 | assert len(rest) == 0 51 | -------------------------------------------------------------------------------- /lib/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Environment helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import os 24 | import sys 25 | 26 | # Default value of the CMake install prefix 27 | _CMAKE_INSTALL_PREFIX = '/usr/local' 28 | 29 | 30 | def get_runtime_dir(): 31 | """Retrieve the path to the runtime directory.""" 32 | return sys.path[0] 33 | 34 | 35 | def get_py_bin_ext(): 36 | """Retrieve python binary extension.""" 37 | return '.py' 38 | 39 | 40 | def set_up_matplotlib(): 41 | """Set matplotlib up.""" 42 | import matplotlib 43 | # Use a non-interactive backend 44 | matplotlib.use('Agg') 45 | 46 | 47 | def exit_on_error(): 48 | """Exit from a detectron tool when there's an error.""" 49 | sys.exit(1) 50 | 51 | 52 | def import_nccl_ops(): 53 | """Import NCCL ops.""" 54 | # There is no need to load NCCL ops since the 55 | # NCCL dependency is built into the Caffe2 gpu lib 56 | pass 57 | 58 | 59 | def get_detectron_ops_lib(): 60 | """Retrieve Detectron ops library.""" 61 | # Candidate prefixes for the detectron ops lib path 62 | prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path 63 | # Search for detectron ops lib 64 | for prefix in prefixes: 65 | ops_path = os.path.join(prefix, 'lib/libcaffe2_detectron_ops_gpu.so') 66 | if os.path.exists(ops_path): 67 | # TODO(ilijar): Switch to using a logger 68 | print('Found Detectron ops lib: {}'.format(ops_path)) 69 | break 70 | assert os.path.exists(ops_path), \ 71 | ('Detectron ops lib not found; make sure that your Caffe2 ' 72 | 'version includes Detectron module') 73 | return ops_path 74 | 75 | 76 | def get_custom_ops_lib(): 77 | """Retrieve custom ops library.""" 78 | lib_dir, _utils = os.path.split(os.path.dirname(__file__)) 79 | custom_ops_lib = os.path.join( 80 | lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so') 81 | assert os.path.exists(custom_ops_lib), \ 82 | 'Custom ops lib not found at \'{}\''.format(custom_ops_lib) 83 | return custom_ops_lib 84 | -------------------------------------------------------------------------------- /lib/utils/fpn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import utils.boxes as box_utils 4 | from core.config import cfg 5 | 6 | 7 | # ---------------------------------------------------------------------------- # 8 | # Helper functions for working with multilevel FPN RoIs 9 | # ---------------------------------------------------------------------------- # 10 | 11 | def map_rois_to_fpn_levels(rois, k_min, k_max): 12 | """Determine which FPN level each RoI in a set of RoIs should map to based 13 | on the heuristic in the FPN paper. 14 | """ 15 | # Compute level ids 16 | areas, neg_idx = box_utils.boxes_area(rois) 17 | areas[neg_idx] = 0 18 | s = np.sqrt(areas) 19 | s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 20 | lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 21 | 22 | # Eqn.(1) in FPN paper 23 | target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) 24 | target_lvls = np.clip(target_lvls, k_min, k_max) 25 | 26 | # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs 27 | target_lvls[neg_idx] = -1 28 | return target_lvls 29 | 30 | 31 | def add_multilevel_roi_blobs( 32 | blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max 33 | ): 34 | """Add RoI blobs for multiple FPN levels to the blobs dict. 35 | 36 | blobs: a dict mapping from blob name to numpy ndarray 37 | blob_prefix: name prefix to use for the FPN blobs 38 | rois: the source rois as a 2D numpy array of shape (N, 5) where each row is 39 | an roi and the columns encode (batch_idx, x1, y1, x2, y2) 40 | target_lvls: numpy array of shape (N, ) indicating which FPN level each roi 41 | in rois should be assigned to. -1 means correspoind roi should be discarded. 42 | lvl_min: the finest (highest resolution) FPN level (e.g., 2) 43 | lvl_max: the coarest (lowest resolution) FPN level (e.g., 6) 44 | """ 45 | rois_idx_order = np.empty((0, )) 46 | rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert 47 | # Delete roi entries that have negative area 48 | idx_neg = np.where(target_lvls == -1)[0] 49 | rois = np.delete(rois, idx_neg, axis=0) 50 | blobs[blob_prefix] = rois 51 | target_lvls = np.delete(target_lvls, idx_neg, axis=0) 52 | for lvl in range(lvl_min, lvl_max + 1): 53 | idx_lvl = np.where(target_lvls == lvl)[0] 54 | blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :] 55 | rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) 56 | rois_stacked = np.vstack( 57 | [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] 58 | ) 59 | rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) 60 | blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore 61 | # Sanity check that restore order is correct 62 | assert (rois_stacked[rois_idx_restore] == rois).all() 63 | -------------------------------------------------------------------------------- /lib/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | from collections import defaultdict, Iterable 4 | from datetime import datetime 5 | from copy import deepcopy 6 | from itertools import chain 7 | 8 | import torch 9 | 10 | 11 | def get_run_name(): 12 | """ A unique name for each run """ 13 | return datetime.now().strftime( 14 | '%b%d-%H-%M-%S') + '_' + socket.gethostname() 15 | 16 | 17 | def get_output_dir(args, run_name): 18 | """ Get root output directory for each run """ 19 | cfg_filename, _ = os.path.splitext(os.path.split(args.cfg_file)[1]) 20 | return os.path.join(args.output_base_dir, cfg_filename, run_name) 21 | 22 | 23 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] 24 | 25 | 26 | def is_image_file(filename): 27 | """Checks if a file is an image. 28 | Args: 29 | filename (string): path to a file 30 | Returns: 31 | bool: True if the filename ends with a known image extension 32 | """ 33 | filename_lower = filename.lower() 34 | return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) 35 | 36 | 37 | def get_imagelist_from_dir(dirpath): 38 | images = [] 39 | for f in os.listdir(dirpath): 40 | if is_image_file(f): 41 | images.append(f) 42 | return images 43 | 44 | 45 | def load_optimizer_state_dict(optimizer, state_dict): 46 | # deepcopy, to be consistent with module API 47 | state_dict = deepcopy(state_dict) 48 | # Validate the state_dict 49 | groups = optimizer.param_groups 50 | saved_groups = state_dict['param_groups'] 51 | 52 | if len(groups) != len(saved_groups): 53 | raise ValueError("loaded state dict has a different number of " 54 | "parameter groups") 55 | param_lens = (len(g['params']) for g in groups) 56 | saved_lens = (len(g['params']) for g in saved_groups) 57 | if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)): 58 | raise ValueError("loaded state dict contains a parameter group " 59 | "that doesn't match the size of optimizer's group") 60 | 61 | # Update the state 62 | id_map = {old_id: p for old_id, p in 63 | zip(chain(*(g['params'] for g in saved_groups)), 64 | chain(*(g['params'] for g in groups)))} 65 | 66 | def cast(param, value): 67 | """Make a deep copy of value, casting all tensors to device of param.""" 68 | if torch.is_tensor(value): 69 | # Floating-point types are a bit special here. They are the only ones 70 | # that are assumed to always match the type of params. 71 | if isinstance(param.data, (torch.FloatTensor, torch.cuda.FloatTensor, 72 | torch.DoubleTensor, torch.cuda.DoubleTensor, 73 | torch.HalfTensor, torch.cuda.HalfTensor)): # param.is_floating_point(): 74 | value = value.type_as(param.data) 75 | value = value.cuda(param.get_device()) if param.is_cuda else value.cpu() 76 | return value 77 | elif isinstance(value, dict): 78 | return {k: cast(param, v) for k, v in value.items()} 79 | elif isinstance(value, Iterable): 80 | return type(value)(cast(param, v) for v in value) 81 | else: 82 | return value 83 | 84 | # Copy state assigned to params (and cast tensors to appropriate types). 85 | # State that is not assigned to params is copied as is (needed for 86 | # backward compatibility). 87 | state = defaultdict(dict) 88 | for k, v in state_dict['state'].items(): 89 | if k in id_map: 90 | param = id_map[k] 91 | state[param] = cast(param, v) 92 | else: 93 | state[k] = v 94 | 95 | # Update parameter groups, setting their 'params' value 96 | def update_group(group, new_group): 97 | new_group['params'] = group['params'] 98 | return new_group 99 | param_groups = [ 100 | update_group(g, ng) for g, ng in zip(groups, saved_groups)] 101 | optimizer.__setstate__({'state': state, 'param_groups': param_groups}) 102 | -------------------------------------------------------------------------------- /lib/utils/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | from core.config import cfg 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0): 15 | """ 16 | SmoothL1(x) = 0.5 * x^2 / beta if |x| < beta 17 | |x| - 0.5 * beta otherwise. 18 | 1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])). 19 | N is the number of batch elements in the input predictions 20 | """ 21 | box_diff = bbox_pred - bbox_targets 22 | in_box_diff = bbox_inside_weights * box_diff 23 | abs_in_box_diff = torch.abs(in_box_diff) 24 | smoothL1_sign = (abs_in_box_diff < beta).detach().float() 25 | in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \ 26 | (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta)) 27 | out_loss_box = bbox_outside_weights * in_loss_box 28 | loss_box = out_loss_box 29 | N = loss_box.size(0) # batch size 30 | loss_box = loss_box.view(-1).sum(0) / N 31 | return loss_box 32 | 33 | 34 | def clip_gradient(model, clip_norm): 35 | """Computes a gradient clipping coefficient based on gradient norm.""" 36 | totalnorm = 0 37 | for p in model.parameters(): 38 | if p.requires_grad: 39 | modulenorm = p.grad.data.norm() 40 | totalnorm += modulenorm ** 2 41 | totalnorm = np.sqrt(totalnorm) 42 | 43 | norm = clip_norm / max(totalnorm, clip_norm) 44 | for p in model.parameters(): 45 | if p.requires_grad: 46 | p.grad.mul_(norm) 47 | 48 | 49 | def decay_learning_rate(optimizer, cur_lr, decay_rate): 50 | """Decay learning rate""" 51 | new_lr = cur_lr * decay_rate 52 | # ratio = _get_lr_change_ratio(cur_lr, new_lr) 53 | ratio = 1 / decay_rate 54 | if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD: 55 | logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr) 56 | # Update learning rate, note that different parameter may have different learning rate 57 | for param_group in optimizer.param_groups: 58 | cur_lr = param_group['lr'] 59 | new_lr = decay_rate * param_group['lr'] 60 | param_group['lr'] = new_lr 61 | if cfg.SOLVER.TYPE in ['SGD']: 62 | if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \ 63 | ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD: 64 | _CorrectMomentum(optimizer, param_group['params'], new_lr / cur_lr) 65 | 66 | 67 | def _CorrectMomentum(optimizer, param_keys, correction): 68 | """The MomentumSGDUpdate op implements the update V as 69 | 70 | V := mu * V + lr * grad, 71 | 72 | where mu is the momentum factor, lr is the learning rate, and grad is 73 | the stochastic gradient. Since V is not defined independently of the 74 | learning rate (as it should ideally be), when the learning rate is 75 | changed we should scale the update history V in order to make it 76 | compatible in scale with lr * grad. 77 | """ 78 | logger.info('Scaling update history by %.6f (new lr / old lr)', correction) 79 | for p_key in param_keys: 80 | optimizer.state[p_key]['momentum_buffer'] *= correction 81 | 82 | 83 | def _get_lr_change_ratio(cur_lr, new_lr): 84 | eps = 1e-10 85 | ratio = np.max( 86 | (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps))) 87 | ) 88 | return ratio 89 | 90 | 91 | def affine_grid_gen(rois, input_size, grid_size): 92 | 93 | rois = rois.detach() 94 | x1 = rois[:, 1::4] / 16.0 95 | y1 = rois[:, 2::4] / 16.0 96 | x2 = rois[:, 3::4] / 16.0 97 | y2 = rois[:, 4::4] / 16.0 98 | 99 | height = input_size[0] 100 | width = input_size[1] 101 | 102 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 103 | theta = torch.cat([\ 104 | (x2 - x1) / (width - 1), 105 | zero, 106 | (x1 + x2 - width + 1) / (width - 1), 107 | zero, 108 | (y2 - y1) / (height - 1), 109 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 110 | 111 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size))) 112 | 113 | return grid 114 | 115 | 116 | def save_ckpt(output_dir, args, epoch, step, model, optimizer, iters_per_epoch): 117 | """Save checkpoint""" 118 | if args.no_save: 119 | return 120 | ckpt_dir = os.path.join(output_dir, 'ckpt') 121 | if not os.path.exists(ckpt_dir): 122 | os.makedirs(ckpt_dir) 123 | save_name = os.path.join(ckpt_dir, 'model_{}_{}.pth'.format(epoch, step)) 124 | if args.mGPUs: 125 | model = model.module 126 | model_state_dict = model.state_dict() 127 | torch.save({ 128 | 'epoch': epoch, 129 | 'step': step, 130 | 'iters_per_epoch': iters_per_epoch, 131 | 'model': model.state_dict(), 132 | 'optimizer': optimizer.state_dict()}, save_name) 133 | logger.info('save model: %s', save_name) 134 | 135 | 136 | def load_ckpt(model, ckpt): 137 | """Load checkpoint""" 138 | mapping, _ = model.detectron_weight_mapping 139 | state_dict = {} 140 | for name in ckpt: 141 | if mapping[name]: 142 | state_dict[name] = ckpt[name] 143 | model.load_state_dict(state_dict, strict=False) 144 | -------------------------------------------------------------------------------- /lib/utils/resnet_weights_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for converting resnet pretrained weights from other formats 3 | """ 4 | import os 5 | 6 | import torch 7 | 8 | import nn as mynn 9 | import utils.detectron_weight_helper as dwh 10 | from core.config import cfg 11 | 12 | 13 | def load_pretrained_imagenet_weights(model): 14 | """Load pretrained weights 15 | Args: 16 | num_layers: 50 for res50 and so on. 17 | model: the generalized rcnnn module 18 | """ 19 | weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS) 20 | pretrianed_state_dict = convert_state_dict(torch.load(weights_file)) 21 | 22 | # Convert batchnorm weights 23 | for name, mod in model.named_modules(): 24 | if isinstance(mod, mynn.AffineChannel2d): 25 | if cfg.FPN.FPN_ON: 26 | pretrianed_name = name.split('.', 2)[-1] 27 | else: 28 | pretrianed_name = name.split('.', 1)[-1] 29 | bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean'] 30 | bn_var = pretrianed_state_dict[pretrianed_name + '.running_var'] 31 | scale = pretrianed_state_dict[pretrianed_name + '.weight'] 32 | bias = pretrianed_state_dict[pretrianed_name + '.bias'] 33 | std = torch.sqrt(bn_var + 1e-5) 34 | new_scale = scale / std 35 | new_bias = bias - bn_mean * scale / std 36 | pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale 37 | pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias 38 | 39 | model_state_dict = model.state_dict() 40 | 41 | pattern = dwh.resnet_weights_name_pattern() 42 | 43 | name_mapping, _ = model.detectron_weight_mapping 44 | 45 | for k, v in name_mapping.items(): 46 | if v is not None: 47 | if pattern.match(v): 48 | if cfg.FPN.FPN_ON: 49 | pretrianed_key = k.split('.', 2)[-1] 50 | else: 51 | pretrianed_key = k.split('.', 1)[-1] 52 | model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key]) 53 | 54 | 55 | def convert_state_dict(src_dict): 56 | """Return the correct mapping of tensor name and value 57 | 58 | Mapping from the names of torchvision model to our resnet conv_body and box_head. 59 | """ 60 | dst_dict = {} 61 | for k, v in src_dict.items(): 62 | toks = k.split('.') 63 | if k.startswith('layer'): 64 | assert len(toks[0]) == 6 65 | res_id = int(toks[0][5]) + 1 66 | name = '.'.join(['res%d' % res_id] + toks[1:]) 67 | dst_dict[name] = v 68 | elif k.startswith('fc'): 69 | continue 70 | else: 71 | name = '.'.join(['res1'] + toks) 72 | dst_dict[name] = v 73 | return dst_dict 74 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import time 7 | 8 | 9 | class Timer(object): 10 | """A simple timer.""" 11 | 12 | def __init__(self): 13 | self.reset() 14 | 15 | def tic(self): 16 | # using time.time instead of time.clock because time time.clock 17 | # does not normalize for multithreading 18 | self.start_time = time.time() 19 | 20 | def toc(self, average=True): 21 | self.diff = time.time() - self.start_time 22 | self.total_time += self.diff 23 | self.calls += 1 24 | self.average_time = self.total_time / self.calls 25 | if average: 26 | return self.average_time 27 | else: 28 | return self.diff 29 | 30 | def reset(self): 31 | self.total_time = 0. 32 | self.calls = 0 33 | self.start_time = 0. 34 | self.diff = 0. 35 | self.average_time = 0. 36 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH 2 | 3 | Usage: 4 | import this module before import any modules under lib/ 5 | e.g 6 | import _init_paths 7 | from core.config import cfg 8 | """ 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | 14 | def add_path(path): 15 | if path not in sys.path: 16 | sys.path.insert(0, path) 17 | 18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__))) 19 | 20 | # Add lib to PYTHONPATH 21 | lib_path = osp.join(this_dir, 'lib') 22 | add_path(lib_path) 23 | -------------------------------------------------------------------------------- /tools/download_imagenet_weights.py: -------------------------------------------------------------------------------- 1 | """Script to downlaod ImageNet pretrained weights from Google Drive 2 | 3 | Extra packages required to run the script: 4 | colorama, argparse_color_formatter 5 | """ 6 | 7 | import argparse 8 | import os 9 | import requests 10 | from argparse_color_formatter import ColorHelpFormatter 11 | from colorama import init, Fore 12 | 13 | import _init_paths # pylint: disable=unused-import 14 | from core.config import cfg 15 | 16 | 17 | def parse_args(): 18 | """Parser command line argumnets""" 19 | parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter) 20 | parser.add_argument('--output_dir', help='Directory to save downloaded weight files', 21 | default=os.path.join(cfg.DATA_DIR, 'pretrained_model')) 22 | parser.add_argument('-t', '--targets', nargs='+', metavar='file_name', 23 | help='Files to download. Allowed values are: ' + 24 | ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET, 25 | list(PRETRAINED_WEIGHTS.keys()))), 26 | choices=list(PRETRAINED_WEIGHTS.keys()), 27 | default=list(PRETRAINED_WEIGHTS.keys())) 28 | return parser.parse_args() 29 | 30 | 31 | # ---------------------------------------------------------------------------- # 32 | # Mapping from filename to google drive file_id 33 | # ---------------------------------------------------------------------------- # 34 | PRETRAINED_WEIGHTS = { 35 | 'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1', 36 | 'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l', 37 | 'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF', 38 | 'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k', 39 | } 40 | 41 | 42 | # ---------------------------------------------------------------------------- # 43 | # Helper fucntions for download file from google drive 44 | # ---------------------------------------------------------------------------- # 45 | 46 | def download_file_from_google_drive(id, destination): 47 | URL = "https://docs.google.com/uc?export=download" 48 | 49 | session = requests.Session() 50 | 51 | response = session.get(URL, params={'id': id}, stream=True) 52 | token = get_confirm_token(response) 53 | 54 | if token: 55 | params = {'id': id, 'confirm': token} 56 | response = session.get(URL, params=params, stream=True) 57 | 58 | save_response_content(response, destination) 59 | 60 | 61 | def get_confirm_token(response): 62 | for key, value in response.cookies.items(): 63 | if key.startswith('download_warning'): 64 | return value 65 | 66 | return None 67 | 68 | 69 | def save_response_content(response, destination): 70 | CHUNK_SIZE = 32768 71 | 72 | with open(destination, "wb") as f: 73 | for chunk in response.iter_content(CHUNK_SIZE): 74 | if chunk: # filter out keep-alive new chunks 75 | f.write(chunk) 76 | 77 | 78 | def main(): 79 | init() # colorama init. Only has effect on Windows 80 | args = parse_args() 81 | for filename in args.targets: 82 | file_id = PRETRAINED_WEIGHTS[filename] 83 | if not os.path.exists(args.output_dir): 84 | os.makedirs(args.output_dir) 85 | destination = os.path.join(args.output_dir, filename) 86 | download_file_from_google_drive(file_id, destination) 87 | print('Download {} to {}'.format(filename, destination)) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /tools/infer_simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import distutils.util 7 | import os 8 | import sys 9 | import pprint 10 | import subprocess 11 | from collections import defaultdict 12 | from six.moves import xrange 13 | 14 | # Use a non-interactive backend 15 | import matplotlib 16 | matplotlib.use('Agg') 17 | 18 | import numpy as np 19 | import cv2 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.autograd import Variable 24 | 25 | import _init_paths 26 | import nn as mynn 27 | from core.config import cfg, cfg_from_file, cfg_from_list, assert_and_infer_cfg 28 | from core.test import im_detect_all 29 | from modeling.model_builder import Generalized_RCNN 30 | import datasets.dummy_datasets as datasets 31 | import utils.misc as misc_utils 32 | import utils.vis as vis_utils 33 | from utils.detectron_weight_helper import load_detectron_weight 34 | from utils.timer import Timer 35 | 36 | 37 | def parse_args(): 38 | """Parse in command line arguments""" 39 | parser = argparse.ArgumentParser(description='Demonstrate mask-rcnn results') 40 | parser.add_argument( 41 | '--dataset', required=True, 42 | help='training dataset') 43 | 44 | parser.add_argument( 45 | '--cfg', dest='cfg_file', required=True, 46 | help='optional config file') 47 | parser.add_argument( 48 | '--set', dest='set_cfgs', 49 | help='set config keys, will overwrite config in the cfg_file', 50 | default=[], nargs='+') 51 | 52 | parser.add_argument( 53 | '--no_cuda', dest='cuda', help='whether use CUDA', action='store_false') 54 | 55 | parser.add_argument('--load_ckpt', help='path of checkpoint to load') 56 | parser.add_argument( 57 | '--load_detectron', help='path to the detectron weight pickle file') 58 | 59 | parser.add_argument( 60 | '--image_dir', 61 | help='directory to load images for demo') 62 | parser.add_argument( 63 | '--images', nargs='+', 64 | help='images to infer. Must not use with --image_dir') 65 | parser.add_argument( 66 | '--output_dir', 67 | help='directory to save demo results', 68 | default="infer_outputs") 69 | parser.add_argument( 70 | '--merge_pdfs', type=distutils.util.strtobool, default=True) 71 | 72 | args = parser.parse_args() 73 | 74 | return args 75 | 76 | 77 | def main(): 78 | """main function""" 79 | 80 | if not torch.cuda.is_available(): 81 | sys.exit("Need a CUDA device to run the code.") 82 | 83 | args = parse_args() 84 | print('Called with args:') 85 | print(args) 86 | 87 | assert args.image_dir or args.images 88 | assert bool(args.image_dir) ^ bool(args.images) 89 | 90 | if args.dataset.startswith("coco"): 91 | dataset = datasets.get_coco_dataset() 92 | cfg.MODEL.NUM_CLASSES = len(dataset.classes) 93 | elif args.dataset.startswith("keypoints_coco"): 94 | dataset = datasets.get_coco_dataset() 95 | cfg.MODEL.NUM_CLASSES = 2 96 | else: 97 | raise ValueError('Unexpected dataset name: {}'.format(args.dataset)) 98 | 99 | print('load cfg from file: {}'.format(args.cfg_file)) 100 | cfg_from_file(args.cfg_file) 101 | 102 | if args.set_cfgs is not None: 103 | cfg_from_list(args.set_cfgs) 104 | 105 | assert args.load_ckpt or args.load_detectron 106 | cfg.RESNETS.IMAGENET_PRETRAINED = False # Don't need to load imagenet pretrained weights 107 | assert_and_infer_cfg() 108 | 109 | maskRCNN = Generalized_RCNN() 110 | 111 | if args.cuda: 112 | maskRCNN.cuda() 113 | 114 | if args.load_ckpt: 115 | load_name = args.load_ckpt 116 | print("loading checkpoint %s" % (load_name)) 117 | checkpoint = torch.load(load_name) 118 | maskRCNN.load_state_dict(checkpoint['model'], strict=False) 119 | 120 | if args.load_detectron: 121 | print("loading detectron weights %s" % args.load_detectron) 122 | load_detectron_weight(maskRCNN, args.load_detectron) 123 | 124 | maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], 125 | minibatch=True) 126 | 127 | maskRCNN.eval() 128 | if args.image_dir: 129 | imglist = misc_utils.get_imagelist_from_dir(args.image_dir) 130 | else: 131 | imglist = args.images 132 | num_images = len(imglist) 133 | if not os.path.exists(args.output_dir): 134 | os.makedirs(args.output_dir) 135 | 136 | for i in xrange(num_images): 137 | print('img', i) 138 | im = cv2.imread(imglist[i]) 139 | 140 | timers = defaultdict(Timer) 141 | 142 | cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) 143 | 144 | im_name, _ = os.path.splitext(os.path.basename(imglist[i])) 145 | vis_utils.vis_one_image( 146 | im[:, :, ::-1], # BGR -> RGB for visualization 147 | im_name, 148 | args.output_dir, 149 | cls_boxes, 150 | cls_segms, 151 | cls_keyps, 152 | dataset=dataset, 153 | box_alpha=0.3, 154 | show_class=True, 155 | thresh=0.7, 156 | kp_thresh=2 157 | ) 158 | 159 | if args.merge_pdfs and num_images > 1: 160 | merge_out_path = '{}/results.pdf'.format(args.output_dir) 161 | if os.path.exists(merge_out_path): 162 | os.remove(merge_out_path) 163 | command = "pdfunite {}/*.pdf {}".format(args.output_dir, 164 | merge_out_path) 165 | subprocess.call(command, shell=True) 166 | 167 | 168 | if __name__ == '__main__': 169 | main() 170 | --------------------------------------------------------------------------------