├── .gitignore
├── .pylintrc
├── .vscode
    └── settings.json
├── LICENSE
├── README.md
├── configs
    ├── e2e_keypoint_rcnn_R-50-FPN.yaml
    ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml
    ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
    ├── e2e_mask_rcnn_R-101-C4.yml
    ├── e2e_mask_rcnn_R-101-FPN_2x.yaml
    └── e2e_mask_rcnn_R-50-C4.yml
├── demo
    ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k.jpg
    ├── convert_pdf2img.sh
    ├── e2e_mask_rcnn_R-50-C4
    │   └── train_from_scratch_epoch1_bs4
    │   │   ├── img1.jpg
    │   │   ├── img2.jpg
    │   │   ├── img3.jpg
    │   │   └── img4.jpg
    ├── img1_keypoints-detectron-R50-FPN.jpg
    ├── img1_keypoints-pydetectron-R50-FPN.jpg
    ├── img2_keypoints-detectron-R50-FPN.jpg
    ├── img2_keypoints-pydetectron-R50-FPN.jpg
    ├── sample_images
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   ├── img3.jpg
    │   └── img4.jpg
    └── sample_images_keypoints
    │   ├── img1_keypoints.jpg
    │   └── img2_keypoints.jpg
├── lib
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   └── test.py
    ├── datasets
    │   ├── __init__.py
    │   ├── dataset_catalog.py
    │   ├── dummy_datasets.py
    │   ├── json_dataset.py
    │   └── roidb.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── make.sh
    │   │   ├── nms_gpu.py
    │   │   ├── nms_kernel.cu
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cuda.c
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda_kernel.cu
    │   │   │   └── nms_cuda_kernel.h
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   └── roi_align_kernel.h
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── _crop_resize.so
    │   │   │   └── roi_crop
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   └── src
    │   │   │   ├── roi_crop.c
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda.c
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop_cuda_kernel.cu
    │   │   │   └── roi_crop_cuda_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   └── net_utils.py
    ├── modeling
    │   ├── FPN.py
    │   ├── ResNet.py
    │   ├── __init__.py
    │   ├── collect_and_distribute_fpn_rpn_proposals.py
    │   ├── fast_rcnn_heads.py
    │   ├── generate_anchors.py
    │   ├── generate_proposal_labels.py
    │   ├── generate_proposals.py
    │   ├── keypoint_rcnn_heads.py
    │   ├── mask_rcnn_heads.py
    │   ├── model_builder.py
    │   ├── roi_xfrom
    │   │   ├── __init__.py
    │   │   └── roi_align
    │   │   │   ├── __init__.py
    │   │   │   ├── _ext
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align
    │   │   │       │   └── __init__.py
    │   │   │   ├── build.py
    │   │   │   ├── functions
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align.py
    │   │   │   ├── make.sh
    │   │   │   ├── modules
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align.py
    │   │   │   └── src
    │   │   │       ├── roi_align_cuda.c
    │   │   │       ├── roi_align_cuda.h
    │   │   │       ├── roi_align_kernel.cu
    │   │   │       └── roi_align_kernel.h
    │   └── rpn_heads.py
    ├── nn
    │   ├── __init__.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── affine.py
    │   │   └── upsample.py
    │   └── parallel
    │   │   ├── __init__.py
    │   │   ├── _functions.py
    │   │   ├── data_parallel.py
    │   │   ├── parallel_apply.py
    │   │   ├── replicate.py
    │   │   └── scatter_gather.py
    ├── roi_data
    │   ├── __init__.py
    │   ├── data_utils.py
    │   ├── fast_rcnn.py
    │   ├── keypoint_rcnn.py
    │   ├── loader.py
    │   ├── mask_rcnn.py
    │   ├── minibatch.py
    │   └── rpn.py
    ├── setup.py
    └── utils
    │   ├── __init__.py
    │   ├── blob.py
    │   ├── boxes.py
    │   ├── collections.py
    │   ├── colormap.py
    │   ├── cython_bbox.c
    │   ├── cython_bbox.pyx
    │   ├── cython_nms.c
    │   ├── cython_nms.pyx
    │   ├── detectron_weight_helper.py
    │   ├── env.py
    │   ├── fpn.py
    │   ├── keypoints.py
    │   ├── misc.py
    │   ├── net.py
    │   ├── resnet_weights_helper.py
    │   ├── segms.py
    │   ├── timer.py
    │   └── vis.py
└── tools
    ├── _init_paths.py
    ├── download_imagenet_weights.py
    ├── infer_simple.py
    └── train_net.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | data/*
 2 | *.pyc
 3 | *~
 4 | 
 5 | *.o
 6 | *.so
 7 | 
 8 | .ipynb_checkpoints
 9 | notebooks/*.pkl
10 | 
11 | /Outputs
12 | 
13 | # ------------------------------
14 | 
15 | .vscode/*
16 | !.vscode/settings.json
17 | !.vscode/tasks.json
18 | !.vscode/launch.json
19 | !.vscode/extensions.json
20 | 
21 | # General
22 | .DS_Store
23 | .AppleDouble
24 | .LSOverride
25 | 
26 | # Icon must end with two \r
27 | Icon
28 | 
29 | # Thumbnails
30 | ._*
31 | 
32 | # Files that might appear in the root of a volume
33 | .DocumentRevisions-V100
34 | .fseventsd
35 | .Spotlight-V100
36 | .TemporaryItems
37 | .Trashes
38 | .VolumeIcon.icns
39 | .com.apple.timemachine.donotpresent
40 | 
41 | # Directories potentially created on remote AFP share
42 | .AppleDB
43 | .AppleDesktop
44 | Network Trash Folder
45 | Temporary Items
46 | .apdisk
47 | 
48 | *~
49 | 
50 | # temporary files which can be created if a process still has a handle open of a deleted file
51 | .fuse_hidden*
52 | 
53 | # KDE directory preferences
54 | .directory
55 | 
56 | # Linux trash folder which might appear on any partition or disk
57 | .Trash-*
58 | 
59 | # .nfs files are created when an open file is removed but is still being accessed
60 | .nfs*
61 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | extension-pkg-whitelist=numpy,torch,cv2
3 | init-hook="sys.path.insert(0, './tools'); import _init_paths"
4 | 
5 | [MESSAGES CONTROL]
6 | disable=wrong-import-position
7 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.pylintEnabled": true,
3 |     "python.linting.flake8Enabled": false,
4 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Roy Tseng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/configs/e2e_keypoint_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | OUTPUT_DIR: .
50 | 


--------------------------------------------------------------------------------
/configs/e2e_keypoint_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | OUTPUT_DIR: .
50 | 


--------------------------------------------------------------------------------
/configs/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | OUTPUT_DIR: .
50 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R-101-C4.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet101_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | OUTPUT_DIR: .


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R-101-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | OUTPUT_DIR: .
45 | 


--------------------------------------------------------------------------------
/configs/e2e_mask_rcnn_R-50-C4.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | OUTPUT_DIR: .


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/33823288584_1d21cf0a26_k.jpg


--------------------------------------------------------------------------------
/demo/convert_pdf2img.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pdfdir=''
 4 | 
 5 | while getopts 'd:' flag; do
 6 |     case "$flag" in
 7 |         d) pdfdir=$OPTARG ;;
 8 |     esac
 9 | done
10 | 
11 | for pdf in $(ls ${pdfdir}/img*.pdf); do
12 |     fname="${pdf%.*}"
13 |     convert -density 300x300 -quality 95 $pdf ${fname}.jpg
14 | done
15 | 


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img1_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img1_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img2_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/img2_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img1.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img2.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img3.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images/img4.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img1_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images_keypoints/img1_keypoints.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img2_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/demo/sample_images_keypoints/img2_keypoints.jpg


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/__init__.py


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/core/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/datasets/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/dummy_datasets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use
16 | cases, like getting all classes in a dataset. This exists so that demos can be
17 | run without requiring users to download/install datasets first.
18 | """
19 | 
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | from __future__ import unicode_literals
24 | 
25 | from utils.collections import AttrDict
26 | 
27 | 
28 | def get_coco_dataset():
29 |     """A dummy COCO dataset that includes only the 'classes' field."""
30 |     ds = AttrDict()
31 |     classes = [
32 |         '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
33 |         'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
34 |         'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
35 |         'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
36 |         'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
37 |         'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
38 |         'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
39 |         'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
40 |         'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
41 |         'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
42 |         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
43 |         'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
44 |         'scissors', 'teddy bear', 'hair drier', 'toothbrush'
45 |     ]
46 |     ds.classes = {i: name for i, name in enumerate(classes)}
47 |     return ds
48 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | python setup.py build_ext --inplace
 6 | rm -rf build
 7 | 
 8 | # Choose cuda arch as you need
 9 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
10 |            -gencode arch=compute_35,code=sm_35 \
11 |            -gencode arch=compute_50,code=sm_50 \
12 |            -gencode arch=compute_52,code=sm_52 \
13 |            -gencode arch=compute_60,code=sm_60 \
14 |            -gencode arch=compute_61,code=sm_61 "
15 | #          -gencode arch=compute_70,code=sm_70 "
16 | 
17 | # compile NMS
18 | cd model/nms/src
19 | echo "Compiling nms kernels by nvcc..."
20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
21 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
22 | 
23 | cd ../
24 | python build.py
25 | 
26 | # compile roi_pooling
27 | cd ../../
28 | cd model/roi_pooling/src
29 | echo "Compiling roi pooling kernels by nvcc..."
30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
31 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
32 | cd ../
33 | python build.py
34 | 
35 | # # compile roi_align
36 | # cd ../../
37 | # cd model/roi_align/src
38 | # echo "Compiling roi align kernels by nvcc..."
39 | # nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
40 | # 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
41 | # cd ../
42 | # python build.py
43 | 
44 | # compile roi_crop
45 | cd ../../
46 | cd model/roi_crop/src
47 | echo "Compiling roi crop kernels by nvcc..."
48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
49 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
50 | cd ../
51 | python build.py
52 | 
53 | # compile roi_align (based on Caffe2's implementation)
54 | cd ../../
55 | cd modeling/roi_xfrom/roi_align/src
56 | echo "Compiling roi align kernels by nvcc..."
57 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
58 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
59 | cd ../
60 | python build.py
61 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from core.config import cfg
 9 | from model.nms.nms_gpu import nms_gpu
10 | 
11 | def nms(dets, thresh, force_cpu=False):
12 |     """Dispatch to either CPU or GPU NMS implementations."""
13 |     if dets.shape[0] == 0:
14 |         return []
15 |     # ---numpy version---
16 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
17 |     # ---pytorch version---
18 |     return nms_gpu(dets, thresh)
19 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         boxes_host->size[0], 
15 | 		         boxes_host->size[1],
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_align_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             raise NotImplementedError
30 | 
31 |         return output
32 | 
33 |     def backward(self, grad_output):
34 |         assert(self.feature_size is not None and grad_output.is_cuda)
35 | 
36 |         batch_size, num_channels, data_height, data_width = self.feature_size
37 | 
38 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
39 |                                   data_width).zero_()
40 |         roi_align.roi_align_backward_cuda(self.aligned_height,
41 |                                           self.aligned_width,
42 |                                           self.spatial_scale, grad_output,
43 |                                           self.rois, grad_input)
44 | 
45 |         # print grad_input
46 | 
47 |         return grad_input, None
48 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1],
 23 |                                                output->size[3],
 24 |                                                output->size[2],
 25 |                                                output->size[0],
 26 |                                                THCudaTensor_size(state, inputImages, 1),
 27 |                                                THCudaTensor_size(state, inputImages, 2),
 28 |                                                THCudaTensor_size(state, inputImages, 3),
 29 |                                                THCudaTensor_size(state, inputImages, 0),
 30 |                                                THCudaTensor_data(state, inputImages),
 31 |                                                THCudaTensor_stride(state, inputImages, 0),
 32 |                                                THCudaTensor_stride(state, inputImages, 1),
 33 |                                                THCudaTensor_stride(state, inputImages, 2),
 34 |                                                THCudaTensor_stride(state, inputImages, 3),
 35 |                                                THCudaTensor_data(state, grids),
 36 |                                                THCudaTensor_stride(state, grids, 0),
 37 |                                                THCudaTensor_stride(state, grids, 3),
 38 |                                                THCudaTensor_stride(state, grids, 1),
 39 |                                                THCudaTensor_stride(state, grids, 2),
 40 |                                                THCudaTensor_data(state, output),
 41 |                                                THCudaTensor_stride(state, output, 0),
 42 |                                                THCudaTensor_stride(state, output, 1),
 43 |                                                THCudaTensor_stride(state, output, 2),
 44 |                                                THCudaTensor_stride(state, output, 3),
 45 |                                                THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1],
 66 |                                                   gradOutput->size[3],
 67 |                                                   gradOutput->size[2],
 68 |                                                   gradOutput->size[0],
 69 |                                                   THCudaTensor_size(state, inputImages, 1),
 70 |                                                   THCudaTensor_size(state, inputImages, 2),
 71 |                                                   THCudaTensor_size(state, inputImages, 3),
 72 |                                                   THCudaTensor_size(state, inputImages, 0),
 73 |                                                   THCudaTensor_data(state, inputImages),
 74 |                                                   THCudaTensor_stride(state, inputImages, 0),
 75 |                                                   THCudaTensor_stride(state, inputImages, 1),
 76 |                                                   THCudaTensor_stride(state, inputImages, 2),
 77 |                                                   THCudaTensor_stride(state, inputImages, 3),
 78 |                                                   THCudaTensor_data(state, grids),
 79 |                                                   THCudaTensor_stride(state, grids, 0),
 80 |                                                   THCudaTensor_stride(state, grids, 3),
 81 |                                                   THCudaTensor_stride(state, grids, 1),
 82 |                                                   THCudaTensor_stride(state, grids, 2),
 83 |                                                   THCudaTensor_data(state, gradInputImages),
 84 |                                                   THCudaTensor_stride(state, gradInputImages, 0),
 85 |                                                   THCudaTensor_stride(state, gradInputImages, 1),
 86 |                                                   THCudaTensor_stride(state, gradInputImages, 2),
 87 |                                                   THCudaTensor_stride(state, gradInputImages, 3),
 88 |                                                   THCudaTensor_data(state, gradGrids),
 89 |                                                   THCudaTensor_stride(state, gradGrids, 0),
 90 |                                                   THCudaTensor_stride(state, gradGrids, 3),
 91 |                                                   THCudaTensor_stride(state, gradGrids, 1),
 92 |                                                   THCudaTensor_stride(state, gradGrids, 2),
 93 |                                                   THCudaTensor_data(state, gradOutput),
 94 |                                                   THCudaTensor_stride(state, gradOutput, 0),
 95 |                                                   THCudaTensor_stride(state, gradOutput, 1),
 96 |                                                   THCudaTensor_stride(state, gradOutput, 2),
 97 |                                                   THCudaTensor_stride(state, gradOutput, 3),
 98 |                                                   THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_pooling_cuda.c']
15 |     headers += ['src/roi_pooling_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_pooling.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_pooling',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/model/utils/net_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | import torchvision.models as models
  7 | from core.config import cfg
  8 | from model.roi_crop.functions.roi_crop import RoICropFunction
  9 | import cv2
 10 | import pdb
 11 | import random
 12 | 
 13 | def save_net(fname, net):
 14 |     import h5py
 15 |     h5f = h5py.File(fname, mode='w')
 16 |     for k, v in net.state_dict().items():
 17 |         h5f.create_dataset(k, data=v.cpu().numpy())
 18 | 
 19 | def load_net(fname, net):
 20 |     import h5py
 21 |     h5f = h5py.File(fname, mode='r')
 22 |     for k, v in net.state_dict().items():
 23 |         param = torch.from_numpy(np.asarray(h5f[k]))
 24 |         v.copy_(param)
 25 | 
 26 | def weights_normal_init(model, dev=0.01):
 27 |     if isinstance(model, list):
 28 |         for m in model:
 29 |             weights_normal_init(m, dev)
 30 |     else:
 31 |         for m in model.modules():
 32 |             if isinstance(m, nn.Conv2d):
 33 |                 m.weight.data.normal_(0.0, dev)
 34 |             elif isinstance(m, nn.Linear):
 35 |                 m.weight.data.normal_(0.0, dev)
 36 | 
 37 | 
 38 | def _crop_pool_layer(bottom, rois, max_pool=True):
 39 |     # code modified from 
 40 |     # https://github.com/ruotianluo/pytorch-faster-rcnn
 41 |     # implement it using stn
 42 |     # box to affine
 43 |     # input (x1,y1,x2,y2)
 44 |     """
 45 |     [  x2-x1             x1 + x2 - W + 1  ]
 46 |     [  -----      0      ---------------  ]
 47 |     [  W - 1                  W - 1       ]
 48 |     [                                     ]
 49 |     [           y2-y1    y1 + y2 - H + 1  ]
 50 |     [    0      -----    ---------------  ]
 51 |     [           H - 1         H - 1      ]
 52 |     """
 53 |     rois = rois.detach()
 54 |     batch_size = bottom.size(0)
 55 |     D = bottom.size(1)
 56 |     H = bottom.size(2)
 57 |     W = bottom.size(3)
 58 |     roi_per_batch = rois.size(0) / batch_size
 59 |     x1 = rois[:, 1::4] / 16.0
 60 |     y1 = rois[:, 2::4] / 16.0
 61 |     x2 = rois[:, 3::4] / 16.0
 62 |     y2 = rois[:, 4::4] / 16.0
 63 | 
 64 |     height = bottom.size(2)
 65 |     width = bottom.size(3)
 66 | 
 67 |     # affine theta
 68 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
 69 |     theta = torch.cat([\
 70 |       (x2 - x1) / (width - 1),
 71 |       zero,
 72 |       (x1 + x2 - width + 1) / (width - 1),
 73 |       zero,
 74 |       (y2 - y1) / (height - 1),
 75 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
 76 | 
 77 |     if max_pool:
 78 |       pre_pool_size = cfg.POOLING_SIZE * 2
 79 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
 80 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
 81 |                                                                 .contiguous().view(-1, D, H, W)
 82 |       crops = F.grid_sample(bottom, grid)
 83 |       crops = F.max_pool2d(crops, 2, 2)
 84 |     else:
 85 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
 86 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
 87 |                                                                 .contiguous().view(-1, D, H, W)
 88 |       crops = F.grid_sample(bottom, grid)
 89 |     
 90 |     return crops, grid
 91 | 
 92 | def _affine_grid_gen(rois, input_size, grid_size):
 93 | 
 94 |     rois = rois.detach()
 95 |     x1 = rois[:, 1::4] / 16.0
 96 |     y1 = rois[:, 2::4] / 16.0
 97 |     x2 = rois[:, 3::4] / 16.0
 98 |     y2 = rois[:, 4::4] / 16.0
 99 | 
100 |     height = input_size[0]
101 |     width = input_size[1]
102 | 
103 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
104 |     theta = torch.cat([\
105 |       (x2 - x1) / (width - 1),
106 |       zero,
107 |       (x1 + x2 - width + 1) / (width - 1),
108 |       zero,
109 |       (y2 - y1) / (height - 1),
110 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
111 | 
112 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
113 | 
114 |     return grid
115 | 
116 | def _affine_theta(rois, input_size):
117 | 
118 |     rois = rois.detach()
119 |     x1 = rois[:, 1::4] / 16.0
120 |     y1 = rois[:, 2::4] / 16.0
121 |     x2 = rois[:, 3::4] / 16.0
122 |     y2 = rois[:, 4::4] / 16.0
123 | 
124 |     height = input_size[0]
125 |     width = input_size[1]
126 | 
127 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
128 | 
129 |     # theta = torch.cat([\
130 |     #   (x2 - x1) / (width - 1),
131 |     #   zero,
132 |     #   (x1 + x2 - width + 1) / (width - 1),
133 |     #   zero,
134 |     #   (y2 - y1) / (height - 1),
135 |     #   (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
136 | 
137 |     theta = torch.cat([\
138 |       (y2 - y1) / (height - 1),
139 |       zero,
140 |       (y1 + y2 - height + 1) / (height - 1),
141 |       zero,
142 |       (x2 - x1) / (width - 1),
143 |       (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
144 | 
145 |     return theta
146 | 
147 | def compare_grid_sample():
148 |     # do gradcheck
149 |     N = random.randint(1, 8)
150 |     C = 2 # random.randint(1, 8)
151 |     H = 5 # random.randint(1, 8)
152 |     W = 4 # random.randint(1, 8)
153 |     input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True)
154 |     input_p = input.clone().data.contiguous()
155 |    
156 |     grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True)
157 |     grid_clone = grid.clone().contiguous()
158 | 
159 |     out_offcial = F.grid_sample(input, grid)    
160 |     grad_outputs = Variable(torch.rand(out_offcial.size()).cuda())
161 |     grad_outputs_clone = grad_outputs.clone().contiguous()
162 |     grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous())
163 |     grad_input_off = grad_inputs[0]
164 | 
165 | 
166 |     crf = RoICropFunction()
167 |     grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda()
168 |     out_stn = crf.forward(input_p, grid_yx)
169 |     grad_inputs = crf.backward(grad_outputs_clone.data)
170 |     grad_input_stn = grad_inputs[0]
171 |     pdb.set_trace()
172 | 
173 |     delta = (grad_input_off.data - grad_input_stn).sum()
174 | 


--------------------------------------------------------------------------------
/lib/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/collect_and_distribute_fpn_rpn_proposals.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from torch import nn
  3 | 
  4 | from core.config import cfg
  5 | from datasets import json_dataset
  6 | import roi_data.fast_rcnn
  7 | import utils.blob as blob_utils
  8 | import utils.fpn as fpn_utils
  9 | 
 10 | 
 11 | class CollectAndDistributeFpnRpnProposalsOp(nn.Module):
 12 |     """Merge RPN proposals generated at multiple FPN levels and then
 13 |     distribute those proposals to their appropriate FPN levels. An anchor
 14 |     at one FPN level may predict an RoI that will map to another level,
 15 |     hence the need to redistribute the proposals.
 16 | 
 17 |     This function assumes standard blob names for input and output blobs.
 18 | 
 19 |     Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,
 20 |                   rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]
 21 |         - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois
 22 |         documentation from GenerateProposals.
 23 |         - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN
 24 |         level i; see rpn_roi_probs documentation from GenerateProposals.
 25 | 
 26 |     If used during training, then the input blobs will also include:
 27 |         [roidb, im_info] (see GenerateProposalLabels).
 28 | 
 29 |     Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,
 30 |                    rois_idx_restore]
 31 |         - rois_fpn<i> are the RPN proposals for FPN level i
 32 |         - rois_idx_restore is a permutation on the concatenation of all
 33 |         rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are
 34 |         restored to their original order in the input blobs.
 35 | 
 36 |     If used during training, then the output blobs will also include:
 37 |         [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
 38 |     """
 39 |     def __init__(self):
 40 |         super().__init__()
 41 | 
 42 |     def forward(self, inputs, roidb, im_info):
 43 |         """
 44 |         Args:
 45 |             inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
 46 |                                rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
 47 |             im_info: [[im_height, im_width, im_scale], ...]
 48 |         """
 49 |         rois = collect(inputs, self.training)
 50 |         if self.training:
 51 |             # During training we reuse the data loader code. We populate roidb
 52 |             # entries on the fly using the rois generated by RPN.
 53 |             im_scales = im_info.data.numpy()[:, 2]
 54 |             # For historical consistency with the original Faster R-CNN
 55 |             # implementation we are *not* filtering crowd proposals.
 56 |             # This choice should be investigated in the future (it likely does
 57 |             # not matter).
 58 |             json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
 59 |             # Compute training labels for the RPN proposals; also handles
 60 |             # distributing the proposals over FPN levels
 61 |             output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
 62 |             blobs = {k: [] for k in output_blob_names}
 63 |             roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
 64 |         else:
 65 |             # For inference we have a special code path that avoids some data
 66 |             # loader overhead
 67 |             blobs = distribute(rois, None)
 68 | 
 69 |         return blobs
 70 | 
 71 | 
 72 | def collect(inputs, is_training):
 73 |     cfg_key = 'TRAIN' if is_training else 'TEST'
 74 |     post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 75 |     k_max = cfg.FPN.RPN_MAX_LEVEL
 76 |     k_min = cfg.FPN.RPN_MIN_LEVEL
 77 |     num_lvls = k_max - k_min + 1
 78 |     roi_inputs = inputs[:num_lvls]
 79 |     score_inputs = inputs[num_lvls:]
 80 | 
 81 |     # rois are in [[batch_idx, x0, y0, x1, y2], ...] format
 82 |     # Combine predictions across all levels and retain the top scoring
 83 |     rois = np.concatenate(roi_inputs)
 84 |     scores = np.concatenate(score_inputs).squeeze()
 85 |     inds = np.argsort(-scores)[:post_nms_topN]
 86 |     rois = rois[inds, :]
 87 |     return rois
 88 | 
 89 | 
 90 | def distribute(rois, label_blobs):
 91 |     """To understand the output blob order see return value of
 92 |     roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
 93 |     """
 94 |     lvl_min = cfg.FPN.ROI_MIN_LEVEL
 95 |     lvl_max = cfg.FPN.ROI_MAX_LEVEL
 96 |     lvls = fpn_utils.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)
 97 | 
 98 |     # Delete roi entries that have negative area
 99 |     idx_neg = np.where(lvls == -1)[0]
100 |     rois = np.delete(rois, idx_neg, axis=0)
101 |     lvls = np.delete(lvls, idx_neg, axis=0)
102 | 
103 |     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
104 |     outputs = [None] * len(output_blob_names)
105 |     outputs[0] = rois
106 | 
107 |     # Create new roi blobs for each FPN level
108 |     # (See: utils.fpn.add_multilevel_roi_blobs which is similar but annoying
109 |     # to generalize to support this particular case.)
110 |     rois_idx_order = np.empty((0, ))
111 |     for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
112 |         idx_lvl = np.where(lvls == lvl)[0]
113 |         blob_roi_level = rois[idx_lvl, :]
114 |         outputs[output_idx + 1] = blob_roi_level
115 |         rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
116 |     rois_idx_restore = np.argsort(rois_idx_order)
117 |     outputs[-1] = rois_idx_restore.astype(np.int32)
118 | 
119 |     return dict(zip(output_blob_names, outputs))
120 | 


--------------------------------------------------------------------------------
/lib/modeling/fast_rcnn_heads.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.nn.init as init
  5 | from torch.autograd import Variable
  6 | 
  7 | from core.config import cfg
  8 | import utils.net as net_utils
  9 | 
 10 | 
 11 | class fast_rcnn_outputs(nn.Module):
 12 |     def __init__(self, dim_in):
 13 |         super().__init__()
 14 |         self.cls_score = nn.Linear(dim_in, cfg.MODEL.NUM_CLASSES)
 15 |         if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
 16 |             self.bbox_pred = nn.Linear(dim_in, 4)
 17 |         else:
 18 |             self.bbox_pred = nn.Linear(dim_in, 4 * cfg.MODEL.NUM_CLASSES)
 19 | 
 20 |         self._init_weights()
 21 | 
 22 |     def _init_weights(self):
 23 |         init.normal(self.cls_score.weight, std=0.01)
 24 |         init.constant(self.cls_score.bias, 0)
 25 |         init.normal(self.bbox_pred.weight, std=0.001)
 26 |         init.constant(self.bbox_pred.bias, 0)
 27 | 
 28 |     def detectron_weight_mapping(self):
 29 |         detectron_weight_mapping = {
 30 |             'cls_score.weight': 'cls_score_w',
 31 |             'cls_score.bias': 'cls_score_b',
 32 |             'bbox_pred.weight': 'bbox_pred_w',
 33 |             'bbox_pred.bias': 'bbox_pred_b'
 34 |         }
 35 |         orphan_in_detectron = []
 36 |         return detectron_weight_mapping, orphan_in_detectron
 37 | 
 38 |     def forward(self, x):
 39 |         if x.dim() == 4:
 40 |             x = x.squeeze(3).squeeze(2)
 41 |         cls_score = self.cls_score(x)
 42 |         if not self.training:
 43 |             cls_score = F.softmax(cls_score, dim=1)
 44 |         bbox_pred = self.bbox_pred(x)
 45 | 
 46 |         return cls_score, bbox_pred
 47 | 
 48 | 
 49 | def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets,
 50 |                      bbox_inside_weights, bbox_outside_weights):
 51 |     device_id = cls_score.get_device()
 52 |     rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id)
 53 |     loss_cls = F.cross_entropy(cls_score, rois_label)
 54 | 
 55 |     bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id)
 56 |     bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id)
 57 |     bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id)
 58 |     loss_bbox = net_utils.smooth_l1_loss(
 59 |         bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)
 60 |     return loss_cls, loss_bbox
 61 | 
 62 | 
 63 | # ---------------------------------------------------------------------------- #
 64 | # Box heads
 65 | # ---------------------------------------------------------------------------- #
 66 | 
 67 | class roi_2mlp_head(nn.Module):
 68 |     """Add a ReLU MLP with two hidden layers."""
 69 |     def __init__(self, dim_in, roi_xform_func, spatial_scale):
 70 |         super().__init__()
 71 |         self.dim_in = dim_in
 72 |         self.roi_xform = roi_xform_func
 73 |         self.spatial_scale = spatial_scale
 74 |         self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
 75 | 
 76 |         roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
 77 |         self.fc1 = nn.Linear(dim_in * roi_size**2, hidden_dim)
 78 |         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
 79 | 
 80 |         self._init_weights()
 81 | 
 82 |     def _init_weights(self):
 83 |         init.xavier_uniform(self.fc1.weight)
 84 |         init.constant(self.fc1.bias, 0)
 85 |         init.xavier_uniform(self.fc2.weight)
 86 |         init.constant(self.fc2.bias, 0)
 87 | 
 88 |     def detectron_weight_mapping(self):
 89 |         detectron_weight_mapping = {
 90 |             'fc1.weight': 'fc6_w',
 91 |             'fc1.bias': 'fc6_b',
 92 |             'fc2.weight': 'fc7_w',
 93 |             'fc2.bias': 'fc7_b'
 94 |         }
 95 |         return detectron_weight_mapping, []
 96 | 
 97 |     def forward(self, x, rpn_ret):
 98 |         x = self.roi_xform(
 99 |             x, rpn_ret,
100 |             blob_rois='rois',
101 |             method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
102 |             resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
103 |             spatial_scale=self.spatial_scale,
104 |             sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
105 |         )
106 |         batch_size = x.size(0)
107 |         x = F.relu(self.fc1(x.view(batch_size, -1)), inplace=True)
108 |         x = F.relu(self.fc2(x), inplace=True)
109 | 
110 |         return x
111 | 


--------------------------------------------------------------------------------
/lib/modeling/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Faster R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick and Sean Bell
 22 | # --------------------------------------------------------
 23 | 
 24 | import numpy as np
 25 | 
 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 27 | #
 28 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 29 | #    >> anchors
 30 | #
 31 | #    anchors =
 32 | #
 33 | #       -83   -39   100    56
 34 | #      -175   -87   192   104
 35 | #      -359  -183   376   200
 36 | #       -55   -55    72    72
 37 | #      -119  -119   136   136
 38 | #      -247  -247   264   264
 39 | #       -35   -79    52    96
 40 | #       -79  -167    96   184
 41 | #      -167  -343   184   360
 42 | 
 43 | # array([[ -83.,  -39.,  100.,   56.],
 44 | #        [-175.,  -87.,  192.,  104.],
 45 | #        [-359., -183.,  376.,  200.],
 46 | #        [ -55.,  -55.,   72.,   72.],
 47 | #        [-119., -119.,  136.,  136.],
 48 | #        [-247., -247.,  264.,  264.],
 49 | #        [ -35.,  -79.,   52.,   96.],
 50 | #        [ -79., -167.,   96.,  184.],
 51 | #        [-167., -343.,  184.,  360.]])
 52 | 
 53 | 
 54 | def generate_anchors(
 55 |     stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
 56 | ):
 57 |     """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
 58 |     are centered on stride / 2, have (approximate) sqrt areas of the specified
 59 |     sizes, and aspect ratios as given.
 60 |     """
 61 |     return _generate_anchors(
 62 |         stride,
 63 |         np.array(sizes, dtype=np.float) / stride,
 64 |         np.array(aspect_ratios, dtype=np.float)
 65 |     )
 66 | 
 67 | 
 68 | def _generate_anchors(base_size, scales, aspect_ratios):
 69 |     """Generate anchor (reference) windows by enumerating aspect ratios X
 70 |     scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
 71 |     """
 72 |     anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
 73 |     anchors = _ratio_enum(anchor, aspect_ratios)
 74 |     anchors = np.vstack(
 75 |         [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
 76 |     )
 77 |     return anchors
 78 | 
 79 | 
 80 | def _whctrs(anchor):
 81 |     """Return width, height, x center, and y center for an anchor (window)."""
 82 |     w = anchor[2] - anchor[0] + 1
 83 |     h = anchor[3] - anchor[1] + 1
 84 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 85 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 86 |     return w, h, x_ctr, y_ctr
 87 | 
 88 | 
 89 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 90 |     """Given a vector of widths (ws) and heights (hs) around a center
 91 |     (x_ctr, y_ctr), output a set of anchors (windows).
 92 |     """
 93 |     ws = ws[:, np.newaxis]
 94 |     hs = hs[:, np.newaxis]
 95 |     anchors = np.hstack(
 96 |         (
 97 |             x_ctr - 0.5 * (ws - 1),
 98 |             y_ctr - 0.5 * (hs - 1),
 99 |             x_ctr + 0.5 * (ws - 1),
100 |             y_ctr + 0.5 * (hs - 1)
101 |         )
102 |     )
103 |     return anchors
104 | 
105 | 
106 | def _ratio_enum(anchor, ratios):
107 |     """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
108 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
109 |     size = w * h
110 |     size_ratios = size / ratios
111 |     ws = np.round(np.sqrt(size_ratios))
112 |     hs = np.round(ws * ratios)
113 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 |     return anchors
115 | 
116 | 
117 | def _scale_enum(anchor, scales):
118 |     """Enumerate a set of anchors for each scale wrt an anchor."""
119 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
120 |     ws = w * scales
121 |     hs = h * scales
122 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
123 |     return anchors
124 | 


--------------------------------------------------------------------------------
/lib/modeling/generate_proposal_labels.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from core.config import cfg
 4 | from datasets import json_dataset
 5 | import roi_data.fast_rcnn
 6 | 
 7 | 
 8 | class GenerateProposalLabelsOp(nn.Module):
 9 |     def __init__(self):
10 |         super().__init__()
11 | 
12 |     def forward(self, rpn_rois, roidb, im_info):
13 |         """Op for generating training labels for RPN proposals. This is used
14 |         when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
15 |         Faster R-CNN training).
16 | 
17 |         blobs_in:
18 |           - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
19 |           - 'roidb': roidb entries that will be labeled
20 |           - 'im_info': See GenerateProposals doc.
21 | 
22 |         blobs_out:
23 |           - (variable set of blobs): returns whatever blobs are required for
24 |             training the model. It does this by querying the data loader for
25 |             the list of blobs that are needed.
26 |         """
27 |         im_scales = im_info.data.numpy()[:, 2]
28 | 
29 |         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
30 |         # For historical consistency with the original Faster R-CNN
31 |         # implementation we are *not* filtering crowd proposals.
32 |         # This choice should be investigated in the future (it likely does
33 |         # not matter).
34 |         # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
35 |         json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0)
36 |         blobs = {k: [] for k in output_blob_names}
37 |         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
38 | 
39 |         return blobs
40 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_align_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.sampling_ratio = int(sampling_ratio)
13 |         self.rois = None
14 |         self.feature_size = None
15 | 
16 |     def forward(self, features, rois):
17 |         self.rois = rois
18 |         self.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
24 |         if features.is_cuda:
25 |             roi_align.roi_align_forward_cuda(self.aligned_height,
26 |                                              self.aligned_width,
27 |                                              self.spatial_scale, self.sampling_ratio, features,
28 |                                              rois, output)
29 |         else:
30 |             raise NotImplementedError
31 | 
32 |         return output
33 | 
34 |     def backward(self, grad_output):
35 |         assert(self.feature_size is not None and grad_output.is_cuda)
36 | 
37 |         batch_size, num_channels, data_height, data_width = self.feature_size
38 | 
39 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
40 |                                   data_width).zero_()
41 |         roi_align.roi_align_backward_cuda(self.aligned_height,
42 |                                           self.aligned_width,
43 |                                           self.spatial_scale, self.sampling_ratio, grad_output,
44 |                                           self.rois, grad_input)
45 | 
46 |         # print grad_input
47 | 
48 |         return grad_input, None
49 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/modeling/roi_xfrom/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 |         self.sampling_ratio = int(sampling_ratio)
14 | 
15 |     def forward(self, features, rois):
16 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
17 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
18 | 
19 | class RoIAlignAvg(Module):
20 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
21 |         super(RoIAlignAvg, self).__init__()
22 | 
23 |         self.aligned_width = int(aligned_width)
24 |         self.aligned_height = int(aligned_height)
25 |         self.spatial_scale = float(spatial_scale)
26 |         self.sampling_ratio = int(sampling_ratio)
27 | 
28 |     def forward(self, features, rois):
29 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
30 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
31 |         return avg_pool2d(x, kernel_size=2, stride=1)
32 | 
33 | class RoIAlignMax(Module):
34 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
35 |         super(RoIAlignMax, self).__init__()
36 | 
37 |         self.aligned_width = int(aligned_width)
38 |         self.aligned_height = int(aligned_height)
39 |         self.spatial_scale = float(spatial_scale)
40 |         self.sampling_ratio = int(sampling_ratio)
41 | 
42 |     def forward(self, features, rois):
43 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
44 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
45 |         return max_pool2d(x, kernel_size=2, stride=1)
46 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, sampling_ratio, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, sampling_ratio, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width,  const int sampling_ratio, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width,  const int sampling_ratio, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/modeling/rpn_heads.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from torch.nn import init
  3 | import torch.nn.functional as F
  4 | 
  5 | from core.config import cfg
  6 | from modeling.generate_anchors import generate_anchors
  7 | from modeling.generate_proposals import GenerateProposalsOp
  8 | from modeling.generate_proposal_labels import GenerateProposalLabelsOp
  9 | import modeling.FPN as FPN
 10 | import utils.net as net_utils
 11 | 
 12 | 
 13 | # ---------------------------------------------------------------------------- #
 14 | # RPN and Faster R-CNN outputs and losses
 15 | # ---------------------------------------------------------------------------- #
 16 | 
 17 | def generic_rpn_outputs(dim_in, spatial_scale_in):
 18 |     """Add RPN outputs (objectness classification and bounding box regression)
 19 |     to an RPN model. Abstracts away the use of FPN.
 20 |     """
 21 |     if cfg.FPN.FPN_ON:
 22 |         # Delegate to the FPN module
 23 |         return FPN.fpn_rpn_outputs(dim_in, spatial_scale_in)
 24 |     else:
 25 |         # Not using FPN, add RPN to a single scale
 26 |         return single_scale_rpn_outputs(dim_in, spatial_scale_in)
 27 | 
 28 | 
 29 | def generic_rpn_losses(*inputs, **kwargs):
 30 |     """Add RPN losses. Abstracts away the use of FPN."""
 31 |     if cfg.FPN.FPN_ON:
 32 |         return FPN.fpn_rpn_losses(*inputs, **kwargs)
 33 |     else:
 34 |         return single_scale_rpn_losses(*inputs, **kwargs)
 35 | 
 36 | 
 37 | class single_scale_rpn_outputs(nn.Module):
 38 |     """Add RPN outputs to a single scale model (i.e., no FPN)."""
 39 |     def __init__(self, dim_in, spatial_scale):
 40 |         super().__init__()
 41 |         self.dim_in = dim_in
 42 |         self.dim_out = dim_in if cfg.RPN.OUT_DIM_AS_IN_DIM else cfg.RPN.OUT_DIM
 43 |         anchors = generate_anchors(
 44 |             stride=1. / spatial_scale,
 45 |             sizes=cfg.RPN.SIZES,
 46 |             aspect_ratios=cfg.RPN.ASPECT_RATIOS)
 47 |         num_anchors = anchors.shape[0]
 48 | 
 49 |         # RPN hidden representation
 50 |         self.RPN_conv = nn.Conv2d(self.dim_in, self.dim_out, 3, 1, 1)
 51 |         # Proposal classification scores
 52 |         self.n_score_out = num_anchors * 2 if cfg.RPN.CLS_ACTIVATION == 'softmax' \
 53 |             else num_anchors
 54 |         self.RPN_cls_score = nn.Conv2d(self.dim_out, self.n_score_out, 1, 1, 0)
 55 |         # Proposal bbox regression deltas
 56 |         self.RPN_bbox_pred = nn.Conv2d(self.dim_out, num_anchors * 4, 1, 1, 0)
 57 | 
 58 |         self.RPN_GenerateProposals = GenerateProposalsOp(anchors, spatial_scale)
 59 |         self.RPN_GenerateProposalLabels = GenerateProposalLabelsOp()
 60 | 
 61 |         self._init_weights()
 62 | 
 63 |     def _init_weights(self):
 64 |         init.normal(self.RPN_conv.weight, std=0.01)
 65 |         init.constant(self.RPN_conv.bias, 0)
 66 |         init.normal(self.RPN_cls_score.weight, std=0.01)
 67 |         init.constant(self.RPN_cls_score.bias, 0)
 68 |         init.normal(self.RPN_bbox_pred.weight, std=0.01)
 69 |         init.constant(self.RPN_bbox_pred.bias, 0)
 70 | 
 71 |     def detectron_weight_mapping(self):
 72 |         detectron_weight_mapping = {
 73 |             'RPN_conv.weight': 'conv_rpn_w',
 74 |             'RPN_conv.bias': 'conv_rpn_b',
 75 |             'RPN_cls_score.weight': 'rpn_cls_logits_w',
 76 |             'RPN_cls_score.bias': 'rpn_cls_logits_b',
 77 |             'RPN_bbox_pred.weight': 'rpn_bbox_pred_w',
 78 |             'RPN_bbox_pred.bias': 'rpn_bbox_pred_b'
 79 |         }
 80 |         orphan_in_detectron = []
 81 |         return detectron_weight_mapping, orphan_in_detectron
 82 | 
 83 |     def forward(self, x, im_info, roidb=None):
 84 |         """
 85 |         x: feature maps from the backbone network. (Variable)
 86 |         im_info: (CPU Variable)
 87 |         roidb: (list of ndarray)
 88 |         """
 89 |         rpn_conv = F.relu(self.RPN_conv(x), inplace=True)
 90 | 
 91 |         rpn_cls_logits = self.RPN_cls_score(rpn_conv)
 92 | 
 93 |         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv)
 94 | 
 95 |         return_dict = {
 96 |             'rpn_cls_logits': rpn_cls_logits, 'rpn_bbox_pred': rpn_bbox_pred}
 97 | 
 98 |         if not self.training or cfg.MODEL.FASTER_RCNN:
 99 |             # Proposals are needed during:
100 |             #  1) inference (== not model.train) for RPN only and Faster R-CNN
101 |             #  OR
102 |             #  2) training for Faster R-CNN
103 |             # Otherwise (== training for RPN only), proposals are not needed
104 |             if cfg.RPN.CLS_ACTIVATION == 'softmax':
105 |                 B, C, H, W = rpn_cls_logits.size()
106 |                 rpn_cls_prob = F.softmax(
107 |                     rpn_cls_logits.view(B, 2, C / 2, H, W), dim=1).view(
108 |                         B, C, H, W)
109 |                 rpn_cls_prob = rpn_bbox_pred[:, 1].view(B, C / 2, H, W)
110 |             else:
111 |                 rpn_cls_prob = F.sigmoid(rpn_cls_logits)
112 | 
113 |             rpn_rois, rpn_rois_prob = self.RPN_GenerateProposals(
114 |                 rpn_cls_prob, rpn_bbox_pred, im_info)
115 | 
116 |             return_dict['rpn_rois'] = rpn_rois
117 |             return_dict['rpn_roi_probs'] = rpn_rois_prob
118 | 
119 |         if cfg.MODEL.FASTER_RCNN :
120 |             if self.training:
121 |                 # Add op that generates training labels for in-network RPN proposals
122 |                 blobs_out = self.RPN_GenerateProposalLabels(rpn_rois, roidb, im_info)
123 |                 return_dict.update(blobs_out)
124 |             else:
125 |                 # Alias rois to rpn_rois for inference
126 |                 return_dict['rois'] = return_dict['rpn_rois']
127 | 
128 |         return return_dict
129 | 
130 | 
131 | def single_scale_rpn_losses(
132 |         rpn_cls_logits, rpn_bbox_pred,
133 |         rpn_labels_int32_wide, rpn_bbox_targets_wide,
134 |         rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide):
135 |     """Add losses for a single scale RPN model (i.e., no FPN)."""
136 |     h, w = rpn_cls_logits.shape[2:]
137 |     rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w]   # -1 means ignore
138 |     h, w = rpn_bbox_pred.shape[2:]
139 |     rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w]
140 |     rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w]
141 |     rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w]
142 | 
143 |     if cfg.RPN.CLS_ACTIVATION == 'softmax':
144 |         B, C, H, W = rpn_cls_logits.size()
145 |         rpn_cls_logits = rpn_cls_logits.view(B, 2, C / 2, H, W).permute(0, 2, 3, 4, 1).view(-1, 2)
146 |         rpn_labels_int32 = rpn_labels_int32.view(-1).long()
147 |         loss_rpn_cls = F.cross_entropy(rpn_cls_logits, rpn_labels_int32, ignore_index=-1, size_average=False)
148 |         loss_rpn_cls /= (rpn_labels_int32 >= 0).sum().float()
149 |     else:
150 |         weight = (rpn_labels_int32 >= 0).float()
151 |         loss_rpn_cls = F.binary_cross_entropy_with_logits(
152 |             rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False)
153 |         loss_rpn_cls /= weight.sum()
154 | 
155 |     loss_rpn_bbox = net_utils.smooth_l1_loss(
156 |         rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights,
157 |         beta=1/9)
158 | 
159 |     return loss_rpn_cls, loss_rpn_bbox
160 | 


--------------------------------------------------------------------------------
/lib/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | from .parallel import DataParallel


--------------------------------------------------------------------------------
/lib/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .affine import AffineChannel2d
2 | from .upsample import BilinearInterpolation2d
3 | 


--------------------------------------------------------------------------------
/lib/nn/modules/affine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AffineChannel2d(nn.Module):
 6 |     """ A simple channel-wise affine transformation operation """
 7 |     def __init__(self, num_features):
 8 |         super().__init__()
 9 |         self.num_features = num_features
10 |         self.weight = nn.Parameter(torch.Tensor(num_features))
11 |         self.bias = nn.Parameter(torch.Tensor(num_features))
12 |         self.weight.data.uniform_()
13 |         self.bias.data.zero_()
14 | 
15 |     def forward(self, x):
16 |         return x * self.weight.view(1, self.num_features, 1, 1) + \
17 |             self.bias.view(1, self.num_features, 1, 1)
18 | 


--------------------------------------------------------------------------------
/lib/nn/modules/upsample.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | 
 8 | 
 9 | class BilinearInterpolation2d(nn.Module):
10 |     """Bilinear interpolation in space of scale.
11 | 
12 |     Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
13 | 
14 |     Adapted from the CVPR'15 FCN code.
15 |     See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
16 |     """
17 |     def __init__(self, in_channels, out_channels, up_scale):
18 |         super().__init__()
19 |         assert in_channels == out_channels
20 |         assert up_scale % 2 == 0, 'Scale should be even'
21 |         self.in_channes = in_channels
22 |         self.out_channels = out_channels
23 |         self.up_scale = int(up_scale)
24 |         self.padding = up_scale // 2
25 | 
26 |         def upsample_filt(size):
27 |             factor = (size + 1) // 2
28 |             if size % 2 == 1:
29 |                 center = factor - 1
30 |             else:
31 |                 center = factor - 0.5
32 |             og = np.ogrid[:size, :size]
33 |             return ((1 - abs(og[0] - center) / factor) *
34 |                     (1 - abs(og[1] - center) / factor))
35 | 
36 |         kernel_size = up_scale * 2
37 |         bil_filt = upsample_filt(kernel_size)
38 | 
39 |         kernel = np.zeros(
40 |             (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32
41 |         )
42 |         kernel[range(in_channels), range(out_channels), :, :] = bil_filt
43 | 
44 |         self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
45 |                                          stride=self.up_scale, padding=self.padding)
46 | 
47 |         self.upconv.weight.data.copy_(torch.from_numpy(kernel))
48 |         self.upconv.bias.data.fill_(0)
49 |         self.upconv.weight.requires_grad = False
50 |         self.upconv.bias.requires_grad = False
51 | 
52 |     def forward(self, x):
53 |         return self.upconv(x)
54 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | from .parallel_apply import parallel_apply
2 | from .replicate import replicate
3 | from .data_parallel import DataParallel, data_parallel
4 | from .scatter_gather import scatter, gather
5 | 
6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel',
7 |            'DataParallel']
8 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/_functions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.cuda.comm as comm
  3 | from torch.autograd import Function
  4 | 
  5 | 
  6 | class Broadcast(Function):
  7 | 
  8 |     @staticmethod
  9 |     def forward(ctx, target_gpus, *inputs):
 10 |         if not all(input.is_cuda for input in inputs):
 11 |             raise TypeError('Broadcast function not implemented for CPU tensors')
 12 |         ctx.target_gpus = target_gpus
 13 |         if len(inputs) == 0:
 14 |             return tuple()
 15 |         ctx.num_inputs = len(inputs)
 16 |         ctx.input_device = inputs[0].get_device()
 17 |         outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus)
 18 |         non_differentiables = []
 19 |         for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]):
 20 |             if not input_requires_grad:
 21 |                 for output in outputs:
 22 |                     non_differentiables.append(output[idx])
 23 |         ctx.mark_non_differentiable(*non_differentiables)
 24 |         return tuple([t for tensors in outputs for t in tensors])
 25 | 
 26 |     @staticmethod
 27 |     def backward(ctx, *grad_outputs):
 28 |         return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs)
 29 | 
 30 | 
 31 | class ReduceAddCoalesced(Function):
 32 | 
 33 |     @staticmethod
 34 |     def forward(ctx, destination, num_inputs, *grads):
 35 |         ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)]
 36 | 
 37 |         grads = [grads[i:i + num_inputs]
 38 |                  for i in range(0, len(grads), num_inputs)]
 39 |         return comm.reduce_add_coalesced(grads, destination)
 40 | 
 41 |     @staticmethod
 42 |     def backward(ctx, *grad_outputs):
 43 |         return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs)
 44 | 
 45 | 
 46 | class Gather(Function):
 47 | 
 48 |     @staticmethod
 49 |     def forward(ctx, target_device, dim, *inputs):
 50 |         assert all(map(lambda i: i.is_cuda, inputs))
 51 |         ctx.target_device = target_device
 52 |         ctx.dim = dim
 53 |         ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs))
 54 |         ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
 55 |         return comm.gather(inputs, ctx.dim, ctx.target_device)
 56 | 
 57 |     @staticmethod
 58 |     def backward(ctx, grad_output):
 59 |         return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output)
 60 | 
 61 | 
 62 | class Scatter(Function):
 63 | 
 64 |     @staticmethod
 65 |     def forward(ctx, target_gpus, chunk_sizes, dim, input):
 66 |         ctx.target_gpus = target_gpus
 67 |         ctx.chunk_sizes = chunk_sizes
 68 |         ctx.dim = dim
 69 |         ctx.input_device = input.get_device() if input.is_cuda else -1
 70 |         streams = None
 71 |         if ctx.input_device == -1:
 72 |             # Perform CPU to GPU copies in a background stream
 73 |             streams = [_get_stream(device) for device in ctx.target_gpus]
 74 |         outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams)
 75 |         # Synchronize with the copy stream
 76 |         if streams is not None:
 77 |             for i, output in enumerate(outputs):
 78 |                 with torch.cuda.device(ctx.target_gpus[i]):
 79 |                     main_stream = torch.cuda.current_stream()
 80 |                     main_stream.wait_stream(streams[i])
 81 |                     output.record_stream(main_stream)
 82 |         return outputs
 83 | 
 84 |     @staticmethod
 85 |     def backward(ctx, *grad_output):
 86 |         return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output)
 87 | 
 88 | 
 89 | # background streams used for copying
 90 | _streams = None
 91 | 
 92 | 
 93 | def _get_stream(device):
 94 |     """Gets a background stream for copying between CPU and GPU"""
 95 |     global _streams
 96 |     if device == -1:
 97 |         return None
 98 |     if _streams is None:
 99 |         _streams = [None] * torch.cuda.device_count()
100 |     if _streams[device] is None:
101 |         _streams[device] = torch.cuda.Stream(device)
102 |     return _streams[device]
103 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/parallel_apply.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | def get_a_var(obj):
 7 |     if isinstance(obj, Variable):
 8 |         return obj
 9 | 
10 |     if isinstance(obj, list) or isinstance(obj, tuple):
11 |         results = map(get_a_var, obj)
12 |         for result in results:
13 |             if isinstance(result, Variable):
14 |                 return result
15 |     if isinstance(obj, dict):
16 |         results = map(get_a_var, obj.items())
17 |         for result in results:
18 |             if isinstance(result, Variable):
19 |                 return result
20 |     return None
21 | 
22 | 
23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None):
24 |     assert len(modules) == len(inputs)
25 |     if kwargs_tup is not None:
26 |         assert len(modules) == len(kwargs_tup)
27 |     else:
28 |         kwargs_tup = ({},) * len(modules)
29 |     if devices is not None:
30 |         assert len(modules) == len(devices)
31 |     else:
32 |         devices = [None] * len(modules)
33 | 
34 |     lock = threading.Lock()
35 |     results = {}
36 | 
37 |     def _worker(i, module, input, kwargs, results, lock, device=None):
38 |         if device is None:
39 |             device = get_a_var(input).get_device()
40 |         try:
41 |             with torch.cuda.device(device):
42 |                 output = module(*input, **kwargs)
43 |             with lock:
44 |                 results[i] = output
45 |         except Exception as e:
46 |             with lock:
47 |                 results[i] = e
48 | 
49 |     if len(modules) > 1:
50 |         threads = [threading.Thread(target=_worker,
51 |                                     args=(i, module, input, kwargs, results, lock, device),
52 |                                     )
53 |                    for i, (module, input, kwargs, device) in
54 |                    enumerate(zip(modules, inputs, kwargs_tup, devices))]
55 | 
56 |         for thread in threads:
57 |             thread.start()
58 |         for thread in threads:
59 |             thread.join()
60 |     else:
61 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0])
62 | 
63 |     outputs = []
64 |     for i in range(len(inputs)):
65 |         output = results[i]
66 |         if isinstance(output, Exception):
67 |             raise output
68 |         outputs.append(output)
69 |     return outputs
70 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/replicate.py:
--------------------------------------------------------------------------------
 1 | import torch.cuda.comm as comm
 2 | 
 3 | 
 4 | def replicate(network, devices):
 5 |     from ._functions import Broadcast
 6 | 
 7 |     devices = tuple(devices)
 8 |     num_replicas = len(devices)
 9 | 
10 |     params = list(network.parameters())
11 |     param_indices = {param: idx for idx, param in enumerate(params)}
12 |     param_copies = Broadcast.apply(devices, *params)
13 |     if len(params) > 0:
14 |         param_copies = [param_copies[i:i + len(params)]
15 |                         for i in range(0, len(param_copies), len(params))]
16 | 
17 |     buffers = list(network._all_buffers())
18 |     buffer_indices = {buf: idx for idx, buf in enumerate(buffers)}
19 |     buffer_copies = comm.broadcast_coalesced(buffers, devices)
20 | 
21 |     modules = list(network.modules())
22 |     module_copies = [[] for device in devices]
23 |     module_indices = {}
24 | 
25 |     for i, module in enumerate(modules):
26 |         module_indices[module] = i
27 |         for j in range(num_replicas):
28 |             replica = module.__new__(type(module))
29 |             replica.__dict__ = module.__dict__.copy()
30 |             replica._parameters = replica._parameters.copy()
31 |             replica._buffers = replica._buffers.copy()
32 |             replica._modules = replica._modules.copy()
33 |             module_copies[j].append(replica)
34 | 
35 |     for i, module in enumerate(modules):
36 |         for key, child in module._modules.items():
37 |             if child is None:
38 |                 for j in range(num_replicas):
39 |                     replica = module_copies[j][i]
40 |                     replica._modules[key] = None
41 |             else:
42 |                 module_idx = module_indices[child]
43 |                 for j in range(num_replicas):
44 |                     replica = module_copies[j][i]
45 |                     replica._modules[key] = module_copies[j][module_idx]
46 |         for key, param in module._parameters.items():
47 |             if param is None:
48 |                 for j in range(num_replicas):
49 |                     replica = module_copies[j][i]
50 |                     replica._parameters[key] = None
51 |             else:
52 |                 param_idx = param_indices[param]
53 |                 for j in range(num_replicas):
54 |                     replica = module_copies[j][i]
55 |                     replica._parameters[key] = param_copies[j][param_idx]
56 |         for key, buf in module._buffers.items():
57 |             if buf is None:
58 |                 for j in range(num_replicas):
59 |                     replica = module_copies[j][i]
60 |                     replica._buffers[key] = None
61 |             else:
62 |                 buffer_idx = buffer_indices[buf]
63 |                 for j in range(num_replicas):
64 |                     replica = module_copies[j][i]
65 |                     replica._buffers[key] = buffer_copies[j][buffer_idx]
66 | 
67 |     return [module_copies[j][0] for j in range(num_replicas)]
68 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import re
 3 | import numpy as np
 4 | import torch
 5 | from torch.autograd import Variable
 6 | from ._functions import Scatter, Gather
 7 | from torch._six import string_classes, int_classes
 8 | from torch.utils.data.dataloader import numpy_type_map
 9 | 
10 | 
11 | def scatter(inputs, target_gpus, dim=0):
12 |     r"""
13 |     Slices variables into approximately equal chunks and
14 |     distributes them across given GPUs. Duplicates
15 |     references to objects that are not variables. Does not
16 |     support Tensors.
17 |     """
18 |     def scatter_map(obj):
19 |         if isinstance(obj, Variable):
20 |             return Scatter.apply(target_gpus, None, dim, obj)
21 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
22 |         if isinstance(obj, tuple) and len(obj) > 0:
23 |             return list(zip(*map(scatter_map, obj)))
24 |         if isinstance(obj, list) and len(obj) > 0:
25 |             return list(map(list, zip(*map(scatter_map, obj))))
26 |         if isinstance(obj, dict) and len(obj) > 0:
27 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
28 |         return [obj for targets in target_gpus]
29 | 
30 |     # After scatter_map is called, a scatter_map cell will exist. This cell
31 |     # has a reference to the actual function scatter_map, which has references
32 |     # to a closure that has a reference to the scatter_map cell (because the
33 |     # fn is recursive). To avoid this reference cycle, we set the function to
34 |     # None, clearing the cell
35 |     try:
36 |         return scatter_map(inputs)
37 |     finally:
38 |         scatter_map = None
39 | 
40 | 
41 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
42 |     r"""Scatter with support for kwargs dictionary"""
43 |     inputs = scatter(inputs, target_gpus, dim) if inputs else []
44 |     kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
45 |     if len(inputs) < len(kwargs):
46 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
47 |     elif len(kwargs) < len(inputs):
48 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
49 |     inputs = tuple(inputs)
50 |     kwargs = tuple(kwargs)
51 |     return inputs, kwargs
52 | 
53 | 
54 | def gather(outputs, target_device, dim=0):
55 |     r"""
56 |     Gathers variables from different GPUs on a specified device
57 |       (-1 means the CPU).
58 |     """
59 |     error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}"
60 | 
61 |     def gather_map(outputs):
62 |         out = outputs[0]
63 |         elem_type = type(out)
64 |         if isinstance(out, Variable):
65 |             return Gather.apply(target_device, dim, *outputs)
66 |         if out is None:
67 |             return None
68 |         if isinstance(out, collections.Sequence):
69 |             return type(out)(map(gather_map, zip(*outputs)))
70 |         elif isinstance(out, collections.Mapping):
71 |             return {key: gather_map([d[key] for d in outputs]) for key in out}
72 |         elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
73 |                 and elem_type.__name__ != 'string_':
74 |             elem = out
75 |             if elem_type.__name__ == 'ndarray':
76 |                 # array of string classes and object
77 |                 if re.search('[SaUO]', elem.dtype.str) is not None:
78 |                     raise TypeError(error_msg.format(elem.dtype))
79 | 
80 |                 return Variable(torch.from_numpy(np.concatenate(outputs, dim)))
81 |             if elem.shape == ():  # scalars
82 |                 py_type = float if elem.dtype.name.startswith('float') else int
83 |                 return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs))))
84 |         elif isinstance(out, int_classes):
85 |             return Variable(torch.LongTensor(outputs))
86 |         elif isinstance(out, float):
87 |             return Variable(torch.DoubleTensor(outputs))
88 |         elif isinstance(out, string_classes):
89 |             return outputs
90 | 
91 |         raise TypeError((error_msg.format(elem_type)))
92 | 
93 |     # Recursive function calls like this create reference cycles.
94 |     # Setting the function to None clears the refcycle.
95 |     try:
96 |         return gather_map(outputs)
97 |     finally:
98 |         gather_map = None
99 | 


--------------------------------------------------------------------------------
/lib/roi_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/roi_data/__init__.py


--------------------------------------------------------------------------------
/lib/roi_data/data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | from __future__ import unicode_literals
 23 | 
 24 | from collections import namedtuple
 25 | import logging
 26 | import numpy as np
 27 | import threading
 28 | 
 29 | from core.config import cfg
 30 | from modeling.generate_anchors import generate_anchors
 31 | import utils.boxes as box_utils
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the
 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range
 38 | # of aspect ratios
 39 | FieldOfAnchors = namedtuple(
 40 |     'FieldOfAnchors', [
 41 |         'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',
 42 |         'octave', 'aspect'
 43 |     ]
 44 | )
 45 | 
 46 | # Cache for memoizing _get_field_of_anchors
 47 | _threadlocal_foa = threading.local()
 48 | 
 49 | 
 50 | def get_field_of_anchors(
 51 |     stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None
 52 | ):
 53 |     global _threadlocal_foa
 54 |     if not hasattr(_threadlocal_foa, 'cache'):
 55 |         _threadlocal_foa.cache = {}
 56 | 
 57 |     cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)
 58 |     if cache_key in _threadlocal_foa.cache:
 59 |         return _threadlocal_foa.cache[cache_key]
 60 | 
 61 |     # Anchors at a single feature cell
 62 |     cell_anchors = generate_anchors(
 63 |         stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios
 64 |     )
 65 |     num_cell_anchors = cell_anchors.shape[0]
 66 | 
 67 |     # Generate canonical proposals from shifted anchors
 68 |     # Enumerate all shifted positions on the (H, W) grid
 69 |     fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(
 70 |         cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)
 71 |     )
 72 |     field_size = int(np.ceil(fpn_max_size / float(stride)))
 73 |     shifts = np.arange(0, field_size) * stride
 74 |     shift_x, shift_y = np.meshgrid(shifts, shifts)
 75 |     shift_x = shift_x.ravel()
 76 |     shift_y = shift_y.ravel()
 77 |     shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
 78 | 
 79 |     # Broacast anchors over shifts to enumerate all anchors at all positions
 80 |     # in the (H, W) grid:
 81 |     #   - add A cell anchors of shape (1, A, 4) to
 82 |     #   - K shifts of shape (K, 1, 4) to get
 83 |     #   - all shifted anchors of shape (K, A, 4)
 84 |     #   - reshape to (K*A, 4) shifted anchors
 85 |     A = num_cell_anchors
 86 |     K = shifts.shape[0]
 87 |     field_of_anchors = (
 88 |         cell_anchors.reshape((1, A, 4)) +
 89 |         shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 90 |     )
 91 |     field_of_anchors = field_of_anchors.reshape((K * A, 4))
 92 |     foa = FieldOfAnchors(
 93 |         field_of_anchors=field_of_anchors.astype(np.float32),
 94 |         num_cell_anchors=num_cell_anchors,
 95 |         stride=stride,
 96 |         field_size=field_size,
 97 |         octave=octave,
 98 |         aspect=aspect
 99 |     )
100 |     _threadlocal_foa.cache[cache_key] = foa
101 |     return foa
102 | 
103 | 
104 | def unmap(data, count, inds, fill=0):
105 |     """Unmap a subset of item (data) back to the original set of items (of
106 |     size count)"""
107 |     if count == len(inds):
108 |         return data
109 | 
110 |     if len(data.shape) == 1:
111 |         ret = np.empty((count, ), dtype=data.dtype)
112 |         ret.fill(fill)
113 |         ret[inds] = data
114 |     else:
115 |         ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)
116 |         ret.fill(fill)
117 |         ret[inds, :] = data
118 |     return ret
119 | 
120 | 
121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
122 |     """Compute bounding-box regression targets for an image."""
123 |     return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
124 |         np.float32, copy=False
125 |     )
126 | 


--------------------------------------------------------------------------------
/lib/roi_data/keypoint_rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | """Construct minibatches for Mask R-CNN training when keypoints are enabled.
 16 | Handles the minibatch blobs that are specific to training Mask R-CNN for
 17 | keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are
 18 | handled by their respecitive roi_data modules.
 19 | """
 20 | 
 21 | from __future__ import absolute_import
 22 | from __future__ import division
 23 | from __future__ import print_function
 24 | from __future__ import unicode_literals
 25 | 
 26 | import numpy as np
 27 | 
 28 | from core.config import cfg
 29 | import utils.blob as blob_utils
 30 | import utils.keypoints as keypoint_utils
 31 | 
 32 | 
 33 | def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale,
 34 |                             batch_idx):
 35 |     """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
 36 |     # Note: gt_inds must match how they're computed in
 37 |     # datasets.json_dataset._merge_proposal_boxes_into_roidb
 38 |     gt_inds = np.where(roidb['gt_classes'] > 0)[0]
 39 |     max_overlaps = roidb['max_overlaps']
 40 |     gt_keypoints = roidb['gt_keypoints']
 41 | 
 42 |     ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
 43 |     within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
 44 |     vis_kp = gt_keypoints[ind_kp, 2, :] > 0
 45 |     is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
 46 |     kp_fg_inds = np.where(
 47 |         np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0]
 48 | 
 49 |     kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
 50 |     if kp_fg_inds.size > kp_fg_rois_per_this_image:
 51 |         kp_fg_inds = np.random.choice(
 52 |             kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False)
 53 | 
 54 |     sampled_fg_rois = roidb['boxes'][kp_fg_inds]
 55 |     box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]
 56 | 
 57 |     num_keypoints = gt_keypoints.shape[2]
 58 |     sampled_keypoints = -np.ones(
 59 |         (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
 60 |         dtype=gt_keypoints.dtype)
 61 |     for ii in range(len(sampled_fg_rois)):
 62 |         ind = box_to_gt_ind_map[ii]
 63 |         if ind >= 0:
 64 |             sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
 65 |             assert np.sum(sampled_keypoints[ii, 2, :]) > 0
 66 | 
 67 |     heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
 68 |         sampled_keypoints, sampled_fg_rois)
 69 | 
 70 |     shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
 71 |     heats = heats.reshape(shape)
 72 |     weights = weights.reshape(shape)
 73 | 
 74 |     sampled_fg_rois *= im_scale
 75 |     repeated_batch_idx = batch_idx * blob_utils.ones((sampled_fg_rois.shape[0],
 76 |                                                       1))
 77 |     sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))
 78 | 
 79 |     blobs['keypoint_rois'] = sampled_fg_rois
 80 |     blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
 81 |     blobs['keypoint_weights'] = weights
 82 | 
 83 | 
 84 | def finalize_keypoint_minibatch(blobs, valid):
 85 |     """Finalize the minibatch after blobs for all minibatch images have been
 86 |     collated.
 87 |     """
 88 |     min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH
 89 |     num_visible_keypoints = np.sum(blobs['keypoint_weights'])
 90 |     valid = (valid and len(blobs['keypoint_weights']) > 0
 91 |              and num_visible_keypoints > min_count)
 92 |     # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
 93 |     # See modeling.model_builder.add_keypoint_losses
 94 |     norm = num_visible_keypoints / (
 95 |         cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.TRAIN.
 96 |         FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS)
 97 |     blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32)
 98 |     return valid
 99 | 
100 | 
101 | def _within_box(points, boxes):
102 |     """Validate which keypoints are contained inside a given box.
103 | 
104 |     points: Nx2xK
105 |     boxes: Nx4
106 |     output: NxK
107 |     """
108 |     x_within = np.logical_and(
109 |         points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1),
110 |         points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1))
111 |     y_within = np.logical_and(
112 |         points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1),
113 |         points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1))
114 |     return np.logical_and(x_within, y_within)
115 | 


--------------------------------------------------------------------------------
/lib/roi_data/mask_rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | """Construct minibatches for Mask R-CNN training. Handles the minibatch blobs
 16 | that are specific to Mask R-CNN. Other blobs that are generic to RPN or
 17 | Fast/er R-CNN are handled by their respecitive roi_data modules.
 18 | """
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | from __future__ import unicode_literals
 24 | 
 25 | import logging
 26 | import numpy as np
 27 | 
 28 | from core.config import cfg
 29 | import utils.blob as blob_utils
 30 | import utils.boxes as box_utils
 31 | import utils.segms as segm_utils
 32 | 
 33 | 
 34 | def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
 35 |     """Add Mask R-CNN specific blobs to the input blob dictionary."""
 36 |     # Prepare the mask targets by associating one gt mask to each training roi
 37 |     # that has a fg (non-bg) class label.
 38 |     M = cfg.MRCNN.RESOLUTION
 39 |     polys_gt_inds = np.where((roidb['gt_classes'] > 0) &
 40 |                              (roidb['is_crowd'] == 0))[0]
 41 |     polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
 42 |     boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
 43 |     # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds]
 44 |     fg_inds = np.where(blobs['labels_int32'] > 0)[0]
 45 |     roi_has_mask = blobs['labels_int32'].copy()
 46 |     roi_has_mask[roi_has_mask > 0] = 1
 47 | 
 48 |     if fg_inds.shape[0] > 0:
 49 |         # Class labels for the foreground rois
 50 |         mask_class_labels = blobs['labels_int32'][fg_inds]
 51 |         masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
 52 | 
 53 |         # Find overlap between all foreground rois and the bounding boxes
 54 |         # enclosing each segmentation
 55 |         rois_fg = sampled_boxes[fg_inds]
 56 |         overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
 57 |             rois_fg.astype(np.float32, copy=False),
 58 |             boxes_from_polys.astype(np.float32, copy=False))
 59 |         # Map from each fg rois to the index of the mask with highest overlap
 60 |         # (measured by bbox overlap)
 61 |         fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)
 62 | 
 63 |         # add fg targets
 64 |         for i in range(rois_fg.shape[0]):
 65 |             fg_polys_ind = fg_polys_inds[i]
 66 |             poly_gt = polys_gt[fg_polys_ind]
 67 |             roi_fg = rois_fg[i]
 68 |             # Rasterize the portion of the polygon mask within the given fg roi
 69 |             # to an M x M binary image
 70 |             mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
 71 |             mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
 72 |             masks[i, :] = np.reshape(mask, M**2)
 73 |     else:  # If there are no fg masks (it does happen)
 74 |         # The network cannot handle empty blobs, so we must provide a mask
 75 |         # We simply take the first bg roi, given it an all -1's mask (ignore
 76 |         # label), and label it with class zero (bg).
 77 |         bg_inds = np.where(blobs['labels_int32'] == 0)[0]
 78 |         # rois_fg is actually one background roi, but that's ok because ...
 79 |         rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
 80 |         # We give it an -1's blob (ignore label)
 81 |         masks = -blob_utils.ones((1, M**2), int32=True)
 82 |         # We label it with class = 0 (background)
 83 |         mask_class_labels = blob_utils.zeros((1, ))
 84 |         # Mark that the first roi has a mask
 85 |         roi_has_mask[0] = 1
 86 | 
 87 |     if cfg.MRCNN.CLS_SPECIFIC_MASK:
 88 |         masks = _expand_to_class_specific_mask_targets(masks,
 89 |                                                        mask_class_labels)
 90 | 
 91 |     # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
 92 |     rois_fg *= im_scale
 93 |     repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
 94 |     rois_fg = np.hstack((repeated_batch_idx, rois_fg))
 95 | 
 96 |     # Update blobs dict with Mask R-CNN blobs
 97 |     blobs['mask_rois'] = rois_fg
 98 |     blobs['roi_has_mask_int32'] = roi_has_mask
 99 |     blobs['masks_int32'] = masks
100 | 
101 | 
102 | def _expand_to_class_specific_mask_targets(masks, mask_class_labels):
103 |     """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
104 |     to encode class specific mask targets.
105 |     """
106 |     assert masks.shape[0] == mask_class_labels.shape[0]
107 |     M = cfg.MRCNN.RESOLUTION
108 | 
109 |     # Target values of -1 are "don't care" / ignore labels
110 |     mask_targets = -blob_utils.ones(
111 |         (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True)
112 | 
113 |     for i in range(masks.shape[0]):
114 |         cls = int(mask_class_labels[i])
115 |         start = M**2 * cls
116 |         end = start + M**2
117 |         # Ignore background instance
118 |         # (only happens when there is no fg samples in an image)
119 |         if cls > 0:
120 |             mask_targets[i, start:end] = masks[i, :]
121 | 
122 |     return mask_targets
123 | 


--------------------------------------------------------------------------------
/lib/roi_data/minibatch.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from core.config import cfg
 5 | import utils.blob as blob_utils
 6 | import roi_data.rpn
 7 | 
 8 | 
 9 | def get_minibatch_blob_names(is_training=True):
10 |     """Return blob names in the order in which they are read by the data loader.
11 |     """
12 |     # data blob: holds a batch of N images, each with 3 channels
13 |     blob_names = ['data']
14 |     if cfg.RPN.RPN_ON:
15 |         # RPN-only or end-to-end Faster R-CNN
16 |         blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training)
17 |     elif cfg.RETINANET.RETINANET_ON:
18 |         raise NotImplementedError
19 |     else:
20 |         # Fast R-CNN like models trained on precomputed proposals
21 |         blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names(
22 |             is_training=is_training
23 |         )
24 |     return blob_names
25 | 
26 | 
27 | def get_minibatch(roidb):
28 |     """Given a roidb, construct a minibatch sampled from it."""
29 |     # We collect blobs from each image onto a list and then concat them into a
30 |     # single tensor, hence we initialize each blob to an empty list
31 |     blobs = {k: [] for k in get_minibatch_blob_names()}
32 | 
33 |     # Get the input image blob
34 |     im_blob, im_scales = _get_image_blob(roidb)
35 |     blobs['data'] = im_blob
36 |     if cfg.RPN.RPN_ON:
37 |         # RPN-only or end-to-end Faster/Mask R-CNN
38 |         valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb)
39 |     elif cfg.RETINANET.RETINANET_ON:
40 |         raise NotImplementedError
41 |     else:
42 |         # Fast R-CNN like models trained on precomputed proposals
43 |         valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
44 |     return blobs, valid
45 | 
46 | 
47 | def _get_image_blob(roidb):
48 |     """Builds an input blob from the images in the roidb at the specified
49 |     scales.
50 |     """
51 |     num_images = len(roidb)
52 |     # Sample random scales to use for each image in this batch
53 |     scale_inds = np.random.randint(
54 |         0, high=len(cfg.TRAIN.SCALES), size=num_images)
55 |     processed_ims = []
56 |     im_scales = []
57 |     for i in range(num_images):
58 |         im = cv2.imread(roidb[i]['image'])
59 |         assert im is not None, \
60 |             'Failed to read image \'{}\''.format(roidb[i]['image'])
61 |         # If NOT using opencv to read in images, uncomment following lines
62 |         # if len(im.shape) == 2:
63 |         #     im = im[:, :, np.newaxis]
64 |         #     im = np.concatenate((im, im, im), axis=2)
65 |         # # flip the channel, since the original one using cv2
66 |         # # rgb -> bgr
67 |         # im = im[:, :, ::-1]
68 |         if roidb[i]['flipped']:
69 |             im = im[:, ::-1, :]
70 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
71 |         im, im_scale = blob_utils.prep_im_for_blob(
72 |             im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
73 |         im_scales.append(im_scale[0])
74 |         processed_ims.append(im[0])
75 | 
76 |     # Create a blob to hold the input images [n, c, h, w]
77 |     blob = blob_utils.im_list_to_blob(processed_ims)
78 | 
79 |     return blob, im_scales
80 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | 
 17 | def find_in_path(name, path):
 18 |     "Find a file in a search path"
 19 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | # def locate_cuda():
 28 | #     """Locate the CUDA environment on the system
 29 | #
 30 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 | #     and values giving the absolute path to each directory.
 32 | #
 33 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 | #     is based on finding 'nvcc' in the PATH.
 35 | #     """
 36 | #
 37 | #     # first check if the CUDAHOME env variable is in use
 38 | #     if 'CUDAHOME' in os.environ:
 39 | #         home = os.environ['CUDAHOME']
 40 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 41 | #     else:
 42 | #         # otherwise, search the PATH for NVCC
 43 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 | #         if nvcc is None:
 46 | #             raise EnvironmentError('The nvcc binary could not be '
 47 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 | #         home = os.path.dirname(os.path.dirname(nvcc))
 49 | #
 50 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 51 | #                   'include': pjoin(home, 'include'),
 52 | #                   'lib64': pjoin(home, 'lib64')}
 53 | #     for k, v in cudaconfig.iteritems():
 54 | #         if not os.path.exists(v):
 55 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | #
 57 | #     return cudaconfig
 58 | 
 59 | 
 60 | # CUDA = locate_cuda()
 61 | 
 62 | # Obtain the numpy include directory.  This logic works across numpy versions.
 63 | try:
 64 |     numpy_include = np.get_include()
 65 | except AttributeError:
 66 |     numpy_include = np.get_numpy_include()
 67 | 
 68 | 
 69 | def customize_compiler_for_nvcc(self):
 70 |     """inject deep into distutils to customize how the dispatch
 71 |     to gcc/nvcc works.
 72 | 
 73 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 74 |     injected in, and still have the right customizations (i.e.
 75 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 76 |     the OO route, I have this. Note, it's kindof like a wierd functional
 77 |     subclassing going on."""
 78 | 
 79 |     # tell the compiler it can processes .cu
 80 |     self.src_extensions.append('.cu')
 81 | 
 82 |     # save references to the default compiler_so and _comple methods
 83 |     default_compiler_so = self.compiler_so
 84 |     super = self._compile
 85 | 
 86 |     # now redefine the _compile method. This gets executed for each
 87 |     # object but distutils doesn't have the ability to change compilers
 88 |     # based on source extension: we add it.
 89 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 90 |         print(extra_postargs)
 91 |         if os.path.splitext(src)[1] == '.cu':
 92 |             # use the cuda for .cu files
 93 |             self.set_executable('compiler_so', CUDA['nvcc'])
 94 |             # use only a subset of the extra_postargs, which are 1-1 translated
 95 |             # from the extra_compile_args in the Extension class
 96 |             postargs = extra_postargs['nvcc']
 97 |         else:
 98 |             postargs = extra_postargs['gcc']
 99 | 
100 |         super(obj, src, ext, cc_args, postargs, pp_opts)
101 |         # reset the default compiler_so, which we might have changed for cuda
102 |         self.compiler_so = default_compiler_so
103 | 
104 |     # inject our redefined _compile method into the class
105 |     self._compile = _compile
106 | 
107 | 
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 |     def build_extensions(self):
111 |         customize_compiler_for_nvcc(self.compiler)
112 |         build_ext.build_extensions(self)
113 | 
114 | 
115 | ext_modules = [
116 |     Extension(
117 |         name='utils.cython_bbox',
118 |         sources=['utils/cython_bbox.pyx'],
119 |         extra_compile_args={'gcc': ['-Wno-cpp']},
120 |         include_dirs=[numpy_include]
121 |     ),
122 |     Extension(
123 |         name='utils.cython_nms',
124 |         sources=['utils/cython_nms.pyx'],
125 |         extra_compile_args={'gcc': ['-Wno-cpp']},
126 |         include_dirs=[numpy_include]
127 |     )
128 | ]
129 | 
130 | setup(
131 |     name='mask_rcnn',
132 |     ext_modules=ext_modules,
133 |     # inject our custom trigger
134 |     cmdclass={'build_ext': custom_build_ext},
135 | )
136 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkentaro/mask-rcnn.pytorch/b7686fa15b1565c01276bbb0ca3f22fca778701d/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Fast R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick
 22 | # --------------------------------------------------------
 23 | """blob helper functions."""
 24 | 
 25 | from __future__ import absolute_import
 26 | from __future__ import division
 27 | from __future__ import print_function
 28 | from __future__ import unicode_literals
 29 | 
 30 | from six.moves import cPickle as pickle
 31 | import numpy as np
 32 | import cv2
 33 | 
 34 | from core.config import cfg
 35 | 
 36 | 
 37 | def get_image_blob(im, target_scale, target_max_size):
 38 |     """Convert an image into a network input.
 39 | 
 40 |     Arguments:
 41 |         im (ndarray): a color image in BGR order
 42 | 
 43 |     Returns:
 44 |         blob (ndarray): a data blob holding an image pyramid
 45 |         im_scale (float): image scale (target size) / (original size)
 46 |         im_info (ndarray)
 47 |     """
 48 |     processed_im, im_scale = prep_im_for_blob(
 49 |         im, cfg.PIXEL_MEANS, [target_scale], target_max_size
 50 |     )
 51 |     blob = im_list_to_blob(processed_im)
 52 |     # NOTE: this height and width may be larger than actual scaled input image
 53 |     # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
 54 |     # maintaining this behavior for now to make existing results exactly
 55 |     # reproducible (in practice using the true input image height and width
 56 |     # yields nearly the same results, but they are sometimes slightly different
 57 |     # because predictions near the edge of the image will be pruned more
 58 |     # aggressively).
 59 |     height, width = blob.shape[2], blob.shape[3]
 60 |     im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
 61 |     return blob, im_scale, im_info.astype(np.float32)
 62 | 
 63 | 
 64 | def im_list_to_blob(ims):
 65 |     """Convert a list of images into a network input. Assumes images were
 66 |     prepared using prep_im_for_blob or equivalent: i.e.
 67 |       - BGR channel order
 68 |       - pixel means subtracted
 69 |       - resized to the desired input size
 70 |       - float32 numpy ndarray format
 71 |     Output is a 4D HCHW tensor of the images concatenated along axis 0 with
 72 |     shape.
 73 |     """
 74 |     if not isinstance(ims, list):
 75 |         ims = [ims]
 76 |     max_shape = get_max_shape([im.shape[:2] for im in ims])
 77 | 
 78 |     num_images = len(ims)
 79 |     blob = np.zeros(
 80 |         (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32)
 81 |     for i in range(num_images):
 82 |         im = ims[i]
 83 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
 84 |     # Move channels (axis 3) to axis 1
 85 |     # Axis order will become: (batch elem, channel, height, width)
 86 |     channel_swap = (0, 3, 1, 2)
 87 |     blob = blob.transpose(channel_swap)
 88 |     return blob
 89 | 
 90 | 
 91 | def get_max_shape(im_shapes):
 92 |     """Calculate max spatial size (h, w) for batching given a list of image shapes
 93 |     """
 94 |     max_shape = np.array(im_shapes).max(axis=0)
 95 |     assert max_shape.size == 2
 96 |     # Pad the image so they can be divisible by a stride
 97 |     if cfg.FPN.FPN_ON:
 98 |         stride = float(cfg.FPN.COARSEST_STRIDE)
 99 |         max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
100 |         max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
101 |     return max_shape
102 | 
103 | 
104 | def prep_im_for_blob(im, pixel_means, target_sizes, max_size):
105 |     """Prepare an image for use as a network input blob. Specially:
106 |       - Subtract per-channel pixel mean
107 |       - Convert to float32
108 |       - Rescale to each of the specified target size (capped at max_size)
109 |     Returns a list of transformed images, one for each target size. Also returns
110 |     the scale factors that were used to compute each returned image.
111 |     """
112 |     im = im.astype(np.float32, copy=False)
113 |     im -= pixel_means
114 |     im_shape = im.shape
115 |     im_size_min = np.min(im_shape[0:2])
116 |     im_size_max = np.max(im_shape[0:2])
117 | 
118 |     ims = []
119 |     im_scales = []
120 |     for target_size in target_sizes:
121 |         im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
122 |         im_resized = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
123 |                                 interpolation=cv2.INTER_LINEAR)
124 |         ims.append(im_resized)
125 |         im_scales.append(im_scale)
126 |     return ims, im_scales
127 | 
128 | 
129 | def get_im_blob_sizes(im_shape, target_sizes, max_size):
130 |     """Calculate im blob size for multiple target_sizes given original im shape
131 |     """
132 |     im_size_min = np.min(im_shape)
133 |     im_size_max = np.max(im_shape)
134 |     im_sizes = []
135 |     for target_size in target_sizes:
136 |         im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
137 |         im_sizes.append(np.round(im_shape * im_scale))
138 |     return np.array(im_sizes)
139 | 
140 | 
141 | def get_target_scale(im_size_min, im_size_max, target_size, max_size):
142 |     """Calculate target resize scale
143 |     """
144 |     im_scale = float(target_size) / float(im_size_min)
145 |     # Prevent the biggest axis from being more than max_size
146 |     if np.round(im_scale * im_size_max) > max_size:
147 |         im_scale = float(max_size) / float(im_size_max)
148 |     return im_scale
149 | 
150 | 
151 | def zeros(shape, int32=False):
152 |     """Return a blob of all zeros of the given shape with the correct float or
153 |     int data type.
154 |     """
155 |     return np.zeros(shape, dtype=np.int32 if int32 else np.float32)
156 | 
157 | 
158 | def ones(shape, int32=False):
159 |     """Return a blob of all ones of the given shape with the correct float or
160 |     int data type.
161 |     """
162 |     return np.ones(shape, dtype=np.int32 if int32 else np.float32)
163 | 
164 | 
165 | def serialize(obj):
166 |     """Serialize a Python object using pickle and encode it as an array of
167 |     float32 values so that it can be feed into the workspace. See deserialize().
168 |     """
169 |     return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)
170 | 
171 | 
172 | def deserialize(arr):
173 |     """Unserialize a Python object from an array of float32 values fetched from
174 |     a workspace. See serialize().
175 |     """
176 |     return pickle.loads(arr.astype(np.uint8).tobytes())
177 | 


--------------------------------------------------------------------------------
/lib/utils/collections.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """A simple attribute dictionary used for representing configuration options."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | 
24 | class AttrDict(dict):
25 | 
26 |     IMMUTABLE = '__immutable__'
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         super(AttrDict, self).__init__(*args, **kwargs)
30 |         self.__dict__[AttrDict.IMMUTABLE] = False
31 | 
32 |     def __getattr__(self, name):
33 |         if name in self.__dict__:
34 |             return self.__dict__[name]
35 |         elif name in self:
36 |             return self[name]
37 |         else:
38 |             raise AttributeError(name)
39 | 
40 |     def __setattr__(self, name, value):
41 |         if not self.__dict__[AttrDict.IMMUTABLE]:
42 |             if name in self.__dict__:
43 |                 self.__dict__[name] = value
44 |             else:
45 |                 self[name] = value
46 |         else:
47 |             raise AttributeError(
48 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 |                 format(name, value)
50 |             )
51 | 
52 |     def immutable(self, is_immutable):
53 |         """Set immutability to is_immutable and recursively apply the setting
54 |         to all nested AttrDicts.
55 |         """
56 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 |         # Recursively set immutable state
58 |         for v in self.__dict__.values():
59 |             if isinstance(v, AttrDict):
60 |                 v.immutable(is_immutable)
61 |         for v in self.values():
62 |             if isinstance(v, AttrDict):
63 |                 v.immutable(is_immutable)
64 | 
65 |     def is_immutable(self):
66 |         return self.__dict__[AttrDict.IMMUTABLE]
67 | 


--------------------------------------------------------------------------------
/lib/utils/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """An awesome colormap for really neat visualizations."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | import numpy as np
 24 | 
 25 | 
 26 | def colormap(rgb=False):
 27 |     color_list = np.array(
 28 |         [
 29 |             0.000, 0.447, 0.741,
 30 |             0.850, 0.325, 0.098,
 31 |             0.929, 0.694, 0.125,
 32 |             0.494, 0.184, 0.556,
 33 |             0.466, 0.674, 0.188,
 34 |             0.301, 0.745, 0.933,
 35 |             0.635, 0.078, 0.184,
 36 |             0.300, 0.300, 0.300,
 37 |             0.600, 0.600, 0.600,
 38 |             1.000, 0.000, 0.000,
 39 |             1.000, 0.500, 0.000,
 40 |             0.749, 0.749, 0.000,
 41 |             0.000, 1.000, 0.000,
 42 |             0.000, 0.000, 1.000,
 43 |             0.667, 0.000, 1.000,
 44 |             0.333, 0.333, 0.000,
 45 |             0.333, 0.667, 0.000,
 46 |             0.333, 1.000, 0.000,
 47 |             0.667, 0.333, 0.000,
 48 |             0.667, 0.667, 0.000,
 49 |             0.667, 1.000, 0.000,
 50 |             1.000, 0.333, 0.000,
 51 |             1.000, 0.667, 0.000,
 52 |             1.000, 1.000, 0.000,
 53 |             0.000, 0.333, 0.500,
 54 |             0.000, 0.667, 0.500,
 55 |             0.000, 1.000, 0.500,
 56 |             0.333, 0.000, 0.500,
 57 |             0.333, 0.333, 0.500,
 58 |             0.333, 0.667, 0.500,
 59 |             0.333, 1.000, 0.500,
 60 |             0.667, 0.000, 0.500,
 61 |             0.667, 0.333, 0.500,
 62 |             0.667, 0.667, 0.500,
 63 |             0.667, 1.000, 0.500,
 64 |             1.000, 0.000, 0.500,
 65 |             1.000, 0.333, 0.500,
 66 |             1.000, 0.667, 0.500,
 67 |             1.000, 1.000, 0.500,
 68 |             0.000, 0.333, 1.000,
 69 |             0.000, 0.667, 1.000,
 70 |             0.000, 1.000, 1.000,
 71 |             0.333, 0.000, 1.000,
 72 |             0.333, 0.333, 1.000,
 73 |             0.333, 0.667, 1.000,
 74 |             0.333, 1.000, 1.000,
 75 |             0.667, 0.000, 1.000,
 76 |             0.667, 0.333, 1.000,
 77 |             0.667, 0.667, 1.000,
 78 |             0.667, 1.000, 1.000,
 79 |             1.000, 0.000, 1.000,
 80 |             1.000, 0.333, 1.000,
 81 |             1.000, 0.667, 1.000,
 82 |             0.167, 0.000, 0.000,
 83 |             0.333, 0.000, 0.000,
 84 |             0.500, 0.000, 0.000,
 85 |             0.667, 0.000, 0.000,
 86 |             0.833, 0.000, 0.000,
 87 |             1.000, 0.000, 0.000,
 88 |             0.000, 0.167, 0.000,
 89 |             0.000, 0.333, 0.000,
 90 |             0.000, 0.500, 0.000,
 91 |             0.000, 0.667, 0.000,
 92 |             0.000, 0.833, 0.000,
 93 |             0.000, 1.000, 0.000,
 94 |             0.000, 0.000, 0.167,
 95 |             0.000, 0.000, 0.333,
 96 |             0.000, 0.000, 0.500,
 97 |             0.000, 0.000, 0.667,
 98 |             0.000, 0.000, 0.833,
 99 |             0.000, 0.000, 1.000,
100 |             0.000, 0.000, 0.000,
101 |             0.143, 0.143, 0.143,
102 |             0.286, 0.286, 0.286,
103 |             0.429, 0.429, 0.429,
104 |             0.571, 0.571, 0.571,
105 |             0.714, 0.714, 0.714,
106 |             0.857, 0.857, 0.857,
107 |             1.000, 1.000, 1.000
108 |         ]
109 |     ).astype(np.float32)
110 |     color_list = color_list.reshape((-1, 3)) * 255
111 |     if not rgb:
112 |         color_list = color_list[:, ::-1]
113 |     return color_list
114 | 


--------------------------------------------------------------------------------
/lib/utils/cython_bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps
74 | 


--------------------------------------------------------------------------------
/lib/utils/detectron_weight_helper.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for loading pretrained weights from Detectron pickle files
 2 | """
 3 | 
 4 | import pickle
 5 | import re
 6 | import torch
 7 | 
 8 | 
 9 | def load_detectron_weight(net, detectron_weight_file):
10 |     name_mapping, orphan_in_detectron = net.detectron_weight_mapping
11 | 
12 |     with open(detectron_weight_file, 'rb') as fp:
13 |         src_blobs = pickle.load(fp, encoding='latin1')
14 |     if 'blobs' in src_blobs:
15 |         src_blobs = src_blobs['blobs']
16 | 
17 |     params = net.state_dict()
18 |     for p_name, p_tensor in params.items():
19 |         d_name = name_mapping[p_name]
20 |         if d_name: # if not None of 0
21 |             p_tensor.copy_(torch.Tensor(src_blobs[d_name]))
22 | 
23 | 
24 | def resnet_weights_name_pattern():
25 |     pattern = re.compile(r"conv1_w|res_conv1_.+|res\d_\d_.+")
26 |     return pattern
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     """Testing"""
31 |     from pprint import pprint
32 |     import sys
33 |     sys.path.insert(0, '..')
34 |     from modeling.model_builder import Generalized_RCNN
35 |     from core.config import cfg, cfg_from_file
36 | 
37 |     cfg.MODEL.NUM_CLASSES = 81
38 |     cfg_from_file('../../cfgs/res50_mask.yml')
39 |     net = Generalized_RCNN()
40 | 
41 |     # pprint(list(net.state_dict().keys()), width=1)
42 | 
43 |     mapping, orphans = net.detectron_weight_mapping
44 |     state_dict = net.state_dict()
45 | 
46 |     for k in mapping.keys():
47 |         assert k in state_dict, '%s' % k
48 | 
49 |     rest = set(state_dict.keys()) - set(mapping.keys())
50 |     assert len(rest) == 0
51 | 


--------------------------------------------------------------------------------
/lib/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Environment helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import os
24 | import sys
25 | 
26 | # Default value of the CMake install prefix
27 | _CMAKE_INSTALL_PREFIX = '/usr/local'
28 | 
29 | 
30 | def get_runtime_dir():
31 |     """Retrieve the path to the runtime directory."""
32 |     return sys.path[0]
33 | 
34 | 
35 | def get_py_bin_ext():
36 |     """Retrieve python binary extension."""
37 |     return '.py'
38 | 
39 | 
40 | def set_up_matplotlib():
41 |     """Set matplotlib up."""
42 |     import matplotlib
43 |     # Use a non-interactive backend
44 |     matplotlib.use('Agg')
45 | 
46 | 
47 | def exit_on_error():
48 |     """Exit from a detectron tool when there's an error."""
49 |     sys.exit(1)
50 | 
51 | 
52 | def import_nccl_ops():
53 |     """Import NCCL ops."""
54 |     # There is no need to load NCCL ops since the
55 |     # NCCL dependency is built into the Caffe2 gpu lib
56 |     pass
57 | 
58 | 
59 | def get_detectron_ops_lib():
60 |     """Retrieve Detectron ops library."""
61 |     # Candidate prefixes for the detectron ops lib path
62 |     prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path
63 |     # Search for detectron ops lib
64 |     for prefix in prefixes:
65 |         ops_path = os.path.join(prefix, 'lib/libcaffe2_detectron_ops_gpu.so')
66 |         if os.path.exists(ops_path):
67 |             # TODO(ilijar): Switch to using a logger
68 |             print('Found Detectron ops lib: {}'.format(ops_path))
69 |             break
70 |     assert os.path.exists(ops_path), \
71 |         ('Detectron ops lib not found; make sure that your Caffe2 '
72 |          'version includes Detectron module')
73 |     return ops_path
74 | 
75 | 
76 | def get_custom_ops_lib():
77 |     """Retrieve custom ops library."""
78 |     lib_dir, _utils = os.path.split(os.path.dirname(__file__))
79 |     custom_ops_lib = os.path.join(
80 |         lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
81 |     assert os.path.exists(custom_ops_lib), \
82 |         'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
83 |     return custom_ops_lib
84 | 


--------------------------------------------------------------------------------
/lib/utils/fpn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import utils.boxes as box_utils
 4 | from core.config import cfg
 5 | 
 6 | 
 7 | # ---------------------------------------------------------------------------- #
 8 | # Helper functions for working with multilevel FPN RoIs
 9 | # ---------------------------------------------------------------------------- #
10 | 
11 | def map_rois_to_fpn_levels(rois, k_min, k_max):
12 |     """Determine which FPN level each RoI in a set of RoIs should map to based
13 |     on the heuristic in the FPN paper.
14 |     """
15 |     # Compute level ids
16 |     areas, neg_idx = box_utils.boxes_area(rois)
17 |     areas[neg_idx] = 0
18 |     s = np.sqrt(areas)
19 |     s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
20 |     lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
21 | 
22 |     # Eqn.(1) in FPN paper
23 |     target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
24 |     target_lvls = np.clip(target_lvls, k_min, k_max)
25 | 
26 |     # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs
27 |     target_lvls[neg_idx] = -1
28 |     return target_lvls
29 | 
30 | 
31 | def add_multilevel_roi_blobs(
32 |         blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max
33 |     ):
34 |     """Add RoI blobs for multiple FPN levels to the blobs dict.
35 | 
36 |     blobs: a dict mapping from blob name to numpy ndarray
37 |     blob_prefix: name prefix to use for the FPN blobs
38 |     rois: the source rois as a 2D numpy array of shape (N, 5) where each row is
39 |       an roi and the columns encode (batch_idx, x1, y1, x2, y2)
40 |     target_lvls: numpy array of shape (N, ) indicating which FPN level each roi
41 |       in rois should be assigned to. -1 means correspoind roi should be discarded.
42 |     lvl_min: the finest (highest resolution) FPN level (e.g., 2)
43 |     lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
44 |     """
45 |     rois_idx_order = np.empty((0, ))
46 |     rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
47 |     # Delete roi entries that have negative area
48 |     idx_neg = np.where(target_lvls == -1)[0]
49 |     rois = np.delete(rois, idx_neg, axis=0)
50 |     blobs[blob_prefix] = rois
51 |     target_lvls = np.delete(target_lvls, idx_neg, axis=0)
52 |     for lvl in range(lvl_min, lvl_max + 1):
53 |         idx_lvl = np.where(target_lvls == lvl)[0]
54 |         blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
55 |         rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
56 |         rois_stacked = np.vstack(
57 |             [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]
58 |         )
59 |     rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
60 |     blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
61 |     # Sanity check that restore order is correct
62 |     assert (rois_stacked[rois_idx_restore] == rois).all()
63 | 


--------------------------------------------------------------------------------
/lib/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import socket
  3 | from collections import defaultdict, Iterable
  4 | from datetime import datetime
  5 | from copy import deepcopy
  6 | from itertools import chain
  7 | 
  8 | import torch
  9 | 
 10 | 
 11 | def get_run_name():
 12 |     """ A unique name for each run """
 13 |     return datetime.now().strftime(
 14 |         '%b%d-%H-%M-%S') + '_' + socket.gethostname()
 15 | 
 16 | 
 17 | def get_output_dir(args, run_name):
 18 |     """ Get root output directory for each run """
 19 |     cfg_filename, _ = os.path.splitext(os.path.split(args.cfg_file)[1])
 20 |     return os.path.join(args.output_base_dir, cfg_filename, run_name)
 21 | 
 22 | 
 23 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm']
 24 | 
 25 | 
 26 | def is_image_file(filename):
 27 |     """Checks if a file is an image.
 28 |       Args:
 29 |           filename (string): path to a file
 30 |       Returns:
 31 |           bool: True if the filename ends with a known image extension
 32 |     """
 33 |     filename_lower = filename.lower()
 34 |     return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)
 35 | 
 36 | 
 37 | def get_imagelist_from_dir(dirpath):
 38 |     images = []
 39 |     for f in os.listdir(dirpath):
 40 |         if is_image_file(f):
 41 |             images.append(f)
 42 |     return images
 43 | 
 44 | 
 45 | def load_optimizer_state_dict(optimizer, state_dict):
 46 |     # deepcopy, to be consistent with module API
 47 |     state_dict = deepcopy(state_dict)
 48 |     # Validate the state_dict
 49 |     groups = optimizer.param_groups
 50 |     saved_groups = state_dict['param_groups']
 51 | 
 52 |     if len(groups) != len(saved_groups):
 53 |         raise ValueError("loaded state dict has a different number of "
 54 |                          "parameter groups")
 55 |     param_lens = (len(g['params']) for g in groups)
 56 |     saved_lens = (len(g['params']) for g in saved_groups)
 57 |     if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)):
 58 |         raise ValueError("loaded state dict contains a parameter group "
 59 |                          "that doesn't match the size of optimizer's group")
 60 | 
 61 |     # Update the state
 62 |     id_map = {old_id: p for old_id, p in
 63 |                 zip(chain(*(g['params'] for g in saved_groups)),
 64 |                     chain(*(g['params'] for g in groups)))}
 65 | 
 66 |     def cast(param, value):
 67 |         """Make a deep copy of value, casting all tensors to device of param."""
 68 |         if torch.is_tensor(value):
 69 |             # Floating-point types are a bit special here. They are the only ones
 70 |             # that are assumed to always match the type of params.
 71 |             if isinstance(param.data, (torch.FloatTensor, torch.cuda.FloatTensor,
 72 |                                        torch.DoubleTensor, torch.cuda.DoubleTensor,
 73 |                                        torch.HalfTensor, torch.cuda.HalfTensor)):  # param.is_floating_point():
 74 |                 value = value.type_as(param.data)
 75 |             value = value.cuda(param.get_device()) if param.is_cuda else value.cpu()
 76 |             return value
 77 |         elif isinstance(value, dict):
 78 |             return {k: cast(param, v) for k, v in value.items()}
 79 |         elif isinstance(value, Iterable):
 80 |             return type(value)(cast(param, v) for v in value)
 81 |         else:
 82 |             return value
 83 | 
 84 |     # Copy state assigned to params (and cast tensors to appropriate types).
 85 |     # State that is not assigned to params is copied as is (needed for
 86 |     # backward compatibility).
 87 |     state = defaultdict(dict)
 88 |     for k, v in state_dict['state'].items():
 89 |         if k in id_map:
 90 |             param = id_map[k]
 91 |             state[param] = cast(param, v)
 92 |         else:
 93 |             state[k] = v
 94 | 
 95 |     # Update parameter groups, setting their 'params' value
 96 |     def update_group(group, new_group):
 97 |         new_group['params'] = group['params']
 98 |         return new_group
 99 |     param_groups = [
100 |         update_group(g, ng) for g, ng in zip(groups, saved_groups)]
101 |     optimizer.__setstate__({'state': state, 'param_groups': param_groups})
102 | 


--------------------------------------------------------------------------------
/lib/utils/net.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | from core.config import cfg
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0):
 15 |     """
 16 |     SmoothL1(x) = 0.5 * x^2 / beta      if |x| < beta
 17 |                   |x| - 0.5 * beta      otherwise.
 18 |     1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])).
 19 |     N is the number of batch elements in the input predictions
 20 |     """
 21 |     box_diff = bbox_pred - bbox_targets
 22 |     in_box_diff = bbox_inside_weights * box_diff
 23 |     abs_in_box_diff = torch.abs(in_box_diff)
 24 |     smoothL1_sign = (abs_in_box_diff < beta).detach().float()
 25 |     in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \
 26 |                   (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta))
 27 |     out_loss_box = bbox_outside_weights * in_loss_box
 28 |     loss_box = out_loss_box
 29 |     N = loss_box.size(0)  # batch size
 30 |     loss_box = loss_box.view(-1).sum(0) / N
 31 |     return loss_box
 32 | 
 33 | 
 34 | def clip_gradient(model, clip_norm):
 35 |     """Computes a gradient clipping coefficient based on gradient norm."""
 36 |     totalnorm = 0
 37 |     for p in model.parameters():
 38 |         if p.requires_grad:
 39 |             modulenorm = p.grad.data.norm()
 40 |             totalnorm += modulenorm ** 2
 41 |     totalnorm = np.sqrt(totalnorm)
 42 | 
 43 |     norm = clip_norm / max(totalnorm, clip_norm)
 44 |     for p in model.parameters():
 45 |         if p.requires_grad:
 46 |             p.grad.mul_(norm)
 47 | 
 48 | 
 49 | def decay_learning_rate(optimizer, cur_lr, decay_rate):
 50 |     """Decay learning rate"""
 51 |     new_lr = cur_lr * decay_rate
 52 |     # ratio = _get_lr_change_ratio(cur_lr, new_lr)
 53 |     ratio = 1 / decay_rate
 54 |     if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
 55 |         logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr)
 56 |     # Update learning rate, note that different parameter may have different learning rate
 57 |     for param_group in optimizer.param_groups:
 58 |         cur_lr = param_group['lr']
 59 |         new_lr = decay_rate * param_group['lr']
 60 |         param_group['lr'] = new_lr
 61 |         if cfg.SOLVER.TYPE in ['SGD']:
 62 |             if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
 63 |                     ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
 64 |                 _CorrectMomentum(optimizer, param_group['params'], new_lr / cur_lr)
 65 | 
 66 | 
 67 | def _CorrectMomentum(optimizer, param_keys, correction):
 68 |     """The MomentumSGDUpdate op implements the update V as
 69 | 
 70 |         V := mu * V + lr * grad,
 71 | 
 72 |     where mu is the momentum factor, lr is the learning rate, and grad is
 73 |     the stochastic gradient. Since V is not defined independently of the
 74 |     learning rate (as it should ideally be), when the learning rate is
 75 |     changed we should scale the update history V in order to make it
 76 |     compatible in scale with lr * grad.
 77 |     """
 78 |     logger.info('Scaling update history by %.6f (new lr / old lr)', correction)
 79 |     for p_key in param_keys:
 80 |         optimizer.state[p_key]['momentum_buffer'] *= correction
 81 | 
 82 | 
 83 | def _get_lr_change_ratio(cur_lr, new_lr):
 84 |     eps = 1e-10
 85 |     ratio = np.max(
 86 |         (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))
 87 |     )
 88 |     return ratio
 89 | 
 90 | 
 91 | def affine_grid_gen(rois, input_size, grid_size):
 92 | 
 93 |     rois = rois.detach()
 94 |     x1 = rois[:, 1::4] / 16.0
 95 |     y1 = rois[:, 2::4] / 16.0
 96 |     x2 = rois[:, 3::4] / 16.0
 97 |     y2 = rois[:, 4::4] / 16.0
 98 | 
 99 |     height = input_size[0]
100 |     width = input_size[1]
101 | 
102 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
103 |     theta = torch.cat([\
104 |       (x2 - x1) / (width - 1),
105 |       zero,
106 |       (x1 + x2 - width + 1) / (width - 1),
107 |       zero,
108 |       (y2 - y1) / (height - 1),
109 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
110 | 
111 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
112 | 
113 |     return grid
114 | 
115 | 
116 | def save_ckpt(output_dir, args, epoch, step, model, optimizer, iters_per_epoch):
117 |     """Save checkpoint"""
118 |     if args.no_save:
119 |         return
120 |     ckpt_dir = os.path.join(output_dir, 'ckpt')
121 |     if not os.path.exists(ckpt_dir):
122 |         os.makedirs(ckpt_dir)
123 |     save_name = os.path.join(ckpt_dir, 'model_{}_{}.pth'.format(epoch, step))
124 |     if args.mGPUs:
125 |         model = model.module
126 |     model_state_dict = model.state_dict()
127 |     torch.save({
128 |         'epoch': epoch,
129 |         'step': step,
130 |         'iters_per_epoch': iters_per_epoch,
131 |         'model': model.state_dict(),
132 |         'optimizer': optimizer.state_dict()}, save_name)
133 |     logger.info('save model: %s', save_name)
134 | 
135 | 
136 | def load_ckpt(model, ckpt):
137 |     """Load checkpoint"""
138 |     mapping, _ = model.detectron_weight_mapping
139 |     state_dict = {}
140 |     for name in ckpt:
141 |         if mapping[name]:
142 |             state_dict[name] = ckpt[name]
143 |     model.load_state_dict(state_dict, strict=False)
144 | 


--------------------------------------------------------------------------------
/lib/utils/resnet_weights_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for converting resnet pretrained weights from other formats
 3 | """
 4 | import os
 5 | 
 6 | import torch
 7 | 
 8 | import nn as mynn
 9 | import utils.detectron_weight_helper as dwh
10 | from core.config import cfg
11 | 
12 | 
13 | def load_pretrained_imagenet_weights(model):
14 |     """Load pretrained weights
15 |     Args:
16 |         num_layers: 50 for res50 and so on.
17 |         model: the generalized rcnnn module
18 |     """
19 |     weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS)
20 |     pretrianed_state_dict = convert_state_dict(torch.load(weights_file))
21 | 
22 |     # Convert batchnorm weights
23 |     for name, mod in model.named_modules():
24 |         if isinstance(mod, mynn.AffineChannel2d):
25 |             if cfg.FPN.FPN_ON:
26 |                 pretrianed_name = name.split('.', 2)[-1]
27 |             else:
28 |                 pretrianed_name = name.split('.', 1)[-1]
29 |             bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean']
30 |             bn_var = pretrianed_state_dict[pretrianed_name + '.running_var']
31 |             scale = pretrianed_state_dict[pretrianed_name + '.weight']
32 |             bias = pretrianed_state_dict[pretrianed_name + '.bias']
33 |             std = torch.sqrt(bn_var + 1e-5)
34 |             new_scale = scale / std
35 |             new_bias = bias - bn_mean * scale / std
36 |             pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale
37 |             pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias
38 | 
39 |     model_state_dict = model.state_dict()
40 | 
41 |     pattern = dwh.resnet_weights_name_pattern()
42 | 
43 |     name_mapping, _ = model.detectron_weight_mapping
44 | 
45 |     for k, v in name_mapping.items():
46 |         if v is not None:
47 |             if pattern.match(v):
48 |                 if cfg.FPN.FPN_ON:
49 |                     pretrianed_key = k.split('.', 2)[-1]
50 |                 else:
51 |                     pretrianed_key = k.split('.', 1)[-1]
52 |                 model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key])
53 | 
54 | 
55 | def convert_state_dict(src_dict):
56 |     """Return the correct mapping of tensor name and value
57 | 
58 |     Mapping from the names of torchvision model to our resnet conv_body and box_head.
59 |     """
60 |     dst_dict = {}
61 |     for k, v in src_dict.items():
62 |         toks = k.split('.')
63 |         if k.startswith('layer'):
64 |             assert len(toks[0]) == 6
65 |             res_id = int(toks[0][5]) + 1
66 |             name = '.'.join(['res%d' % res_id] + toks[1:])
67 |             dst_dict[name] = v
68 |         elif k.startswith('fc'):
69 |             continue
70 |         else:
71 |             name = '.'.join(['res1'] + toks)
72 |             dst_dict[name] = v
73 |     return dst_dict
74 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import time
 7 | 
 8 | 
 9 | class Timer(object):
10 |   """A simple timer."""
11 | 
12 |   def __init__(self):
13 |     self.reset()
14 | 
15 |   def tic(self):
16 |     # using time.time instead of time.clock because time time.clock
17 |     # does not normalize for multithreading
18 |     self.start_time = time.time()
19 | 
20 |   def toc(self, average=True):
21 |     self.diff = time.time() - self.start_time
22 |     self.total_time += self.diff
23 |     self.calls += 1
24 |     self.average_time = self.total_time / self.calls
25 |     if average:
26 |       return self.average_time
27 |     else:
28 |       return self.diff
29 | 
30 |   def reset(self):
31 |     self.total_time = 0.
32 |     self.calls = 0
33 |     self.start_time = 0.
34 |     self.diff = 0.
35 |     self.average_time = 0.
36 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
 2 | 
 3 | Usage:
 4 | import this module before import any modules under lib/
 5 | e.g 
 6 |     import _init_paths
 7 |     from core.config import cfg
 8 | """ 
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
19 | 
20 | # Add lib to PYTHONPATH
21 | lib_path = osp.join(this_dir, 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/tools/download_imagenet_weights.py:
--------------------------------------------------------------------------------
 1 | """Script to downlaod ImageNet pretrained weights from Google Drive
 2 | 
 3 | Extra packages required to run the script:
 4 |     colorama, argparse_color_formatter
 5 | """
 6 | 
 7 | import argparse
 8 | import os
 9 | import requests
10 | from argparse_color_formatter import ColorHelpFormatter
11 | from colorama import init, Fore
12 | 
13 | import _init_paths  # pylint: disable=unused-import
14 | from core.config import cfg
15 | 
16 | 
17 | def parse_args():
18 |     """Parser command line argumnets"""
19 |     parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter)
20 |     parser.add_argument('--output_dir', help='Directory to save downloaded weight files',
21 |                         default=os.path.join(cfg.DATA_DIR, 'pretrained_model'))
22 |     parser.add_argument('-t', '--targets', nargs='+', metavar='file_name',
23 |                         help='Files to download. Allowed values are: ' +
24 |                         ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET,
25 |                                       list(PRETRAINED_WEIGHTS.keys()))),
26 |                         choices=list(PRETRAINED_WEIGHTS.keys()),
27 |                         default=list(PRETRAINED_WEIGHTS.keys()))
28 |     return parser.parse_args()
29 | 
30 | 
31 | # ---------------------------------------------------------------------------- #
32 | # Mapping from filename to google drive file_id
33 | # ---------------------------------------------------------------------------- #
34 | PRETRAINED_WEIGHTS = {
35 |     'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1',
36 |     'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l',
37 |     'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF',
38 |     'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k',
39 | }
40 | 
41 | 
42 | # ---------------------------------------------------------------------------- #
43 | # Helper fucntions for download file from google drive
44 | # ---------------------------------------------------------------------------- #
45 | 
46 | def download_file_from_google_drive(id, destination):
47 |     URL = "https://docs.google.com/uc?export=download"
48 | 
49 |     session = requests.Session()
50 | 
51 |     response = session.get(URL, params={'id': id}, stream=True)
52 |     token = get_confirm_token(response)
53 | 
54 |     if token:
55 |         params = {'id': id, 'confirm': token}
56 |         response = session.get(URL, params=params, stream=True)
57 | 
58 |     save_response_content(response, destination)
59 | 
60 | 
61 | def get_confirm_token(response):
62 |     for key, value in response.cookies.items():
63 |         if key.startswith('download_warning'):
64 |             return value
65 | 
66 |     return None
67 | 
68 | 
69 | def save_response_content(response, destination):
70 |     CHUNK_SIZE = 32768
71 | 
72 |     with open(destination, "wb") as f:
73 |         for chunk in response.iter_content(CHUNK_SIZE):
74 |             if chunk:  # filter out keep-alive new chunks
75 |                 f.write(chunk)
76 | 
77 | 
78 | def main():
79 |     init()  # colorama init. Only has effect on Windows
80 |     args = parse_args()
81 |     for filename in args.targets:
82 |         file_id = PRETRAINED_WEIGHTS[filename]
83 |         if not os.path.exists(args.output_dir):
84 |             os.makedirs(args.output_dir)
85 |         destination = os.path.join(args.output_dir, filename)
86 |         download_file_from_google_drive(file_id, destination)
87 |         print('Download {} to {}'.format(filename, destination))
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/tools/infer_simple.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import distutils.util
  7 | import os
  8 | import sys
  9 | import pprint
 10 | import subprocess
 11 | from collections import defaultdict
 12 | from six.moves import xrange
 13 | 
 14 | # Use a non-interactive backend
 15 | import matplotlib
 16 | matplotlib.use('Agg')
 17 | 
 18 | import numpy as np
 19 | import cv2
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.autograd import Variable
 24 | 
 25 | import _init_paths
 26 | import nn as mynn
 27 | from core.config import cfg, cfg_from_file, cfg_from_list, assert_and_infer_cfg
 28 | from core.test import im_detect_all
 29 | from modeling.model_builder import Generalized_RCNN
 30 | import datasets.dummy_datasets as datasets
 31 | import utils.misc as misc_utils
 32 | import utils.vis as vis_utils
 33 | from utils.detectron_weight_helper import load_detectron_weight
 34 | from utils.timer import Timer
 35 | 
 36 | 
 37 | def parse_args():
 38 |     """Parse in command line arguments"""
 39 |     parser = argparse.ArgumentParser(description='Demonstrate mask-rcnn results')
 40 |     parser.add_argument(
 41 |         '--dataset', required=True,
 42 |         help='training dataset')
 43 | 
 44 |     parser.add_argument(
 45 |         '--cfg', dest='cfg_file', required=True,
 46 |         help='optional config file')
 47 |     parser.add_argument(
 48 |         '--set', dest='set_cfgs',
 49 |         help='set config keys, will overwrite config in the cfg_file',
 50 |         default=[], nargs='+')
 51 | 
 52 |     parser.add_argument(
 53 |         '--no_cuda', dest='cuda', help='whether use CUDA', action='store_false')
 54 | 
 55 |     parser.add_argument('--load_ckpt', help='path of checkpoint to load')
 56 |     parser.add_argument(
 57 |         '--load_detectron', help='path to the detectron weight pickle file')
 58 | 
 59 |     parser.add_argument(
 60 |         '--image_dir',
 61 |         help='directory to load images for demo')
 62 |     parser.add_argument(
 63 |         '--images', nargs='+',
 64 |         help='images to infer. Must not use with --image_dir')
 65 |     parser.add_argument(
 66 |         '--output_dir',
 67 |         help='directory to save demo results',
 68 |         default="infer_outputs")
 69 |     parser.add_argument(
 70 |         '--merge_pdfs', type=distutils.util.strtobool, default=True)
 71 | 
 72 |     args = parser.parse_args()
 73 | 
 74 |     return args
 75 | 
 76 | 
 77 | def main():
 78 |     """main function"""
 79 | 
 80 |     if not torch.cuda.is_available():
 81 |         sys.exit("Need a CUDA device to run the code.")
 82 | 
 83 |     args = parse_args()
 84 |     print('Called with args:')
 85 |     print(args)
 86 | 
 87 |     assert args.image_dir or args.images
 88 |     assert bool(args.image_dir) ^ bool(args.images)
 89 | 
 90 |     if args.dataset.startswith("coco"):
 91 |         dataset = datasets.get_coco_dataset()
 92 |         cfg.MODEL.NUM_CLASSES = len(dataset.classes)
 93 |     elif args.dataset.startswith("keypoints_coco"):
 94 |         dataset = datasets.get_coco_dataset()
 95 |         cfg.MODEL.NUM_CLASSES = 2
 96 |     else:
 97 |         raise ValueError('Unexpected dataset name: {}'.format(args.dataset))
 98 | 
 99 |     print('load cfg from file: {}'.format(args.cfg_file))
100 |     cfg_from_file(args.cfg_file)
101 | 
102 |     if args.set_cfgs is not None:
103 |         cfg_from_list(args.set_cfgs)
104 | 
105 |     assert args.load_ckpt or args.load_detectron
106 |     cfg.RESNETS.IMAGENET_PRETRAINED = False  # Don't need to load imagenet pretrained weights
107 |     assert_and_infer_cfg()
108 | 
109 |     maskRCNN = Generalized_RCNN()
110 | 
111 |     if args.cuda:
112 |         maskRCNN.cuda()
113 | 
114 |     if args.load_ckpt:
115 |         load_name = args.load_ckpt
116 |         print("loading checkpoint %s" % (load_name))
117 |         checkpoint = torch.load(load_name)
118 |         maskRCNN.load_state_dict(checkpoint['model'], strict=False)
119 | 
120 |     if args.load_detectron:
121 |         print("loading detectron weights %s" % args.load_detectron)
122 |         load_detectron_weight(maskRCNN, args.load_detectron)
123 | 
124 |     maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'],
125 |                                  minibatch=True)
126 | 
127 |     maskRCNN.eval()
128 |     if args.image_dir:
129 |         imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
130 |     else:
131 |         imglist = args.images
132 |     num_images = len(imglist)
133 |     if not os.path.exists(args.output_dir):
134 |         os.makedirs(args.output_dir)
135 | 
136 |     for i in xrange(num_images):
137 |         print('img', i)
138 |         im = cv2.imread(imglist[i])
139 | 
140 |         timers = defaultdict(Timer)
141 | 
142 |         cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers)
143 | 
144 |         im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
145 |         vis_utils.vis_one_image(
146 |             im[:, :, ::-1],  # BGR -> RGB for visualization
147 |             im_name,
148 |             args.output_dir,
149 |             cls_boxes,
150 |             cls_segms,
151 |             cls_keyps,
152 |             dataset=dataset,
153 |             box_alpha=0.3,
154 |             show_class=True,
155 |             thresh=0.7,
156 |             kp_thresh=2
157 |         )
158 | 
159 |     if args.merge_pdfs and num_images > 1:
160 |         merge_out_path = '{}/results.pdf'.format(args.output_dir)
161 |         if os.path.exists(merge_out_path):
162 |             os.remove(merge_out_path)
163 |         command = "pdfunite {}/*.pdf {}".format(args.output_dir,
164 |                                                 merge_out_path)
165 |         subprocess.call(command, shell=True)
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     main()
170 | 


--------------------------------------------------------------------------------