├── .github
    └── issue_template.md
├── .gitignore
├── .pylintrc
├── .travis.yml
├── .vscode
    └── settings.json
├── BENCHMARK.md
├── LICENSE
├── README.md
├── configs
    ├── baselines
    │   ├── e2e_faster_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_R-101-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_R-50-C4_1x.yaml
    │   ├── e2e_faster_rcnn_R-50-C4_2x.yaml
    │   ├── e2e_faster_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_R-50-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
    │   ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
    │   ├── e2e_mask_rcnn_R-101-C4_2x.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_R-50-C4_1x.yaml
    │   ├── e2e_mask_rcnn_R-50-C4_2x.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
    │   └── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
    ├── getting_started
    │   ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
    └── gn_baselines
    │   ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
├── demo
    ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k.jpg
    ├── convert_pdf2img.sh
    ├── e2e_mask_rcnn_R-50-C4
    │   └── train_from_scratch_epoch1_bs4
    │   │   ├── img1.jpg
    │   │   ├── img2.jpg
    │   │   ├── img3.jpg
    │   │   └── img4.jpg
    ├── img1_keypoints-detectron-R50-FPN.jpg
    ├── img1_keypoints-pydetectron-R50-FPN.jpg
    ├── img2_keypoints-detectron-R50-FPN.jpg
    ├── img2_keypoints-pydetectron-R50-FPN.jpg
    ├── loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg
    ├── loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg
    ├── loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg
    ├── loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg
    ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg
    ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg
    ├── sample_images
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   ├── img3.jpg
    │   └── img4.jpg
    └── sample_images_keypoints
    │   ├── img1_keypoints.jpg
    │   └── img2_keypoints.jpg
├── lib
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   ├── test.py
    │   └── test_engine.py
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── cityscapes
    │   │   ├── __init__.py
    │   │   ├── coco_to_cityscapes_id.py
    │   │   └── tools
    │   │   │   ├── convert_cityscapes_to_coco.py
    │   │   │   └── convert_coco_model_to_cityscapes.py
    │   ├── cityscapes_json_dataset_evaluator.py
    │   ├── dataset_catalog.py
    │   ├── dummy_datasets.py
    │   ├── json_dataset.py
    │   ├── json_dataset_evaluator.py
    │   ├── roidb.py
    │   ├── task_evaluation.py
    │   ├── voc_dataset_evaluator.py
    │   └── voc_eval.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── make.sh
    │   │   ├── nms_gpu.py
    │   │   ├── nms_kernel.cu
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cuda.c
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda_kernel.cu
    │   │   │   └── nms_cuda_kernel.h
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   └── roi_align_kernel.h
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── _crop_resize.so
    │   │   │   └── roi_crop
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   └── src
    │   │   │   ├── roi_crop.c
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda.c
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop_cuda_kernel.cu
    │   │   │   └── roi_crop_cuda_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   └── net_utils.py
    ├── modeling
    │   ├── FPN.py
    │   ├── ResNet.py
    │   ├── __init__.py
    │   ├── collect_and_distribute_fpn_rpn_proposals.py
    │   ├── fast_rcnn_heads.py
    │   ├── generate_anchors.py
    │   ├── generate_proposal_labels.py
    │   ├── generate_proposals.py
    │   ├── keypoint_rcnn_heads.py
    │   ├── mask_rcnn_heads.py
    │   ├── model_builder.py
    │   ├── roi_xfrom
    │   │   ├── __init__.py
    │   │   └── roi_align
    │   │   │   ├── __init__.py
    │   │   │   ├── _ext
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align
    │   │   │       │   └── __init__.py
    │   │   │   ├── build.py
    │   │   │   ├── functions
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align.py
    │   │   │   ├── make.sh
    │   │   │   ├── modules
    │   │   │       ├── __init__.py
    │   │   │       └── roi_align.py
    │   │   │   └── src
    │   │   │       ├── roi_align_cuda.c
    │   │   │       ├── roi_align_cuda.h
    │   │   │       ├── roi_align_kernel.cu
    │   │   │       └── roi_align_kernel.h
    │   └── rpn_heads.py
    ├── nn
    │   ├── __init__.py
    │   ├── functional.py
    │   ├── init.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── affine.py
    │   │   ├── normalization.py
    │   │   └── upsample.py
    │   └── parallel
    │   │   ├── __init__.py
    │   │   ├── _functions.py
    │   │   ├── data_parallel.py
    │   │   ├── parallel_apply.py
    │   │   ├── replicate.py
    │   │   └── scatter_gather.py
    ├── roi_data
    │   ├── __init__.py
    │   ├── data_utils.py
    │   ├── fast_rcnn.py
    │   ├── keypoint_rcnn.py
    │   ├── loader.py
    │   ├── mask_rcnn.py
    │   ├── minibatch.py
    │   └── rpn.py
    ├── setup.py
    └── utils
    │   ├── __init__.py
    │   ├── blob.py
    │   ├── boxes.py
    │   ├── collections.py
    │   ├── colormap.py
    │   ├── cython_bbox.c
    │   ├── cython_bbox.pyx
    │   ├── cython_nms.c
    │   ├── cython_nms.pyx
    │   ├── detectron_weight_helper.py
    │   ├── env.py
    │   ├── fpn.py
    │   ├── image.py
    │   ├── io.py
    │   ├── keypoints.py
    │   ├── logging.py
    │   ├── misc.py
    │   ├── net.py
    │   ├── resnet_weights_helper.py
    │   ├── segms.py
    │   ├── subprocess.py
    │   ├── timer.py
    │   ├── training_stats.py
    │   └── vis.py
└── tools
    ├── _init_paths.py
    ├── download_imagenet_weights.py
    ├── infer_simple.py
    ├── test_net.py
    ├── train_net.py
    └── train_net_step.py


/.github/issue_template.md:
--------------------------------------------------------------------------------
 1 | ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING
 2 | 1. **Read the README.md thoroughly ! README.md is not a decoration.**
 3 | 2. Please search existing *open and closed* issues in case your issue has already been reported
 4 | 3. Please try to debug the issue in case you can solve it on your own before posting
 5 | 
 6 | ## After following steps above and agreeing to provide the detailed information requested below, you may continue with posting your issue
 7 | (**Delete this line and the text above it.**)
 8 | 
 9 | ### Expected results
10 | 
11 | What did you expect to see?
12 | 
13 | ### Actual results
14 | 
15 | What did you observe instead?
16 | 
17 | ### Detailed steps to reproduce
18 | 
19 | E.g.:
20 | 
21 | ```
22 | The command that you ran
23 | ```
24 | 
25 | ### System information
26 | 
27 | * Operating system: ?
28 | * CUDA version: ?
29 | * cuDNN version: ?
30 | * GPU models (for all devices if they are not all the same): ?
31 | * python version: ?
32 | * pytorch version: ?
33 | * Anything else that seems relevant: ?
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/*
 2 | *.pyc
 3 | *~
 4 | 
 5 | *.o
 6 | *.so
 7 | 
 8 | .ipynb_checkpoints
 9 | notebooks/*.pkl
10 | 
11 | /Outputs
12 | 
13 | # ------------------------------
14 | 
15 | .vscode/*
16 | !.vscode/settings.json
17 | !.vscode/tasks.json
18 | !.vscode/launch.json
19 | !.vscode/extensions.json
20 | 
21 | # General
22 | .DS_Store
23 | .AppleDouble
24 | .LSOverride
25 | 
26 | # Icon must end with two \r
27 | Icon
28 | 
29 | # Thumbnails
30 | ._*
31 | 
32 | # Files that might appear in the root of a volume
33 | .DocumentRevisions-V100
34 | .fseventsd
35 | .Spotlight-V100
36 | .TemporaryItems
37 | .Trashes
38 | .VolumeIcon.icns
39 | .com.apple.timemachine.donotpresent
40 | 
41 | # Directories potentially created on remote AFP share
42 | .AppleDB
43 | .AppleDesktop
44 | Network Trash Folder
45 | Temporary Items
46 | .apdisk
47 | 
48 | *~
49 | 
50 | # temporary files which can be created if a process still has a handle open of a deleted file
51 | .fuse_hidden*
52 | 
53 | # KDE directory preferences
54 | .directory
55 | 
56 | # Linux trash folder which might appear on any partition or disk
57 | .Trash-*
58 | 
59 | # .nfs files are created when an open file is removed but is still being accessed
60 | .nfs*
61 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | extension-pkg-whitelist=numpy,torch,cv2
3 | init-hook="sys.path.insert(0, './tools'); import _init_paths"
4 | 
5 | [MESSAGES CONTROL]
6 | disable=wrong-import-position
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | group: travis_latest
 2 | language: python
 3 | cache: pip
 4 | python:
 5 |     - 3.6
 6 |     #- nightly
 7 |     #- pypy3
 8 | matrix:
 9 |     allow_failures:
10 |         - python: nightly
11 |         - python: pypy3
12 | install:
13 |     #- pip install -r requirements.txt
14 |     - pip install flake8  # pytest  # add another testing frameworks later
15 | before_script:
16 |     # stop the build if there are Python syntax errors or undefined names
17 |     - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
18 |     # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
19 |     - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
20 | script:
21 |     - true  # pytest --capture=sys  # add other tests here
22 | notifications:
23 |     on_success: change
24 |     on_failure: change  # `always` will be the setting once code changes slow down
25 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.pylintEnabled": true,
3 |     "python.linting.flake8Enabled": false,
4 |     "python.autoComplete.extraPaths": ["${workspaceRoot}/lib"],
5 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Roy Tseng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 90000
14 |   STEPS: [0, 60000, 80000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 
36 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | RPN:
17 |   SIZES: (32, 64, 128, 256, 512)
18 | FAST_RCNN:
19 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
20 |   ROI_XFORM_METHOD: RoIAlign
21 | TRAIN:
22 |   SCALES: (800,)
23 |   MAX_SIZE: 1333
24 |   IMS_PER_BATCH: 1
25 |   BATCH_SIZE_PER_IM: 512
26 | TEST:
27 |   SCALE: 800
28 |   MAX_SIZE: 1333
29 |   NMS: 0.5
30 |   RPN_PRE_NMS_TOP_N: 6000
31 |   RPN_POST_NMS_TOP_N: 1000
32 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
14 |   MAX_ITER: 360000
15 |   STEPS: [0, 240000, 320000]
16 | RPN:
17 |   SIZES: (32, 64, 128, 256, 512)
18 | FAST_RCNN:
19 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
20 |   ROI_XFORM_METHOD: RoIAlign
21 | TRAIN:
22 |   SCALES: (800,)
23 |   MAX_SIZE: 1333
24 |   IMS_PER_BATCH: 1
25 |   BATCH_SIZE_PER_IM: 512
26 | TEST:
27 |   SCALE: 800
28 |   MAX_SIZE: 1333
29 |   NMS: 0.5
30 |   RPN_PRE_NMS_TOP_N: 6000
31 |   RPN_POST_NMS_TOP_N: 1000
32 | 
33 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 90000
14 |   STEPS: [0, 60000, 80000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 360000
13 |   STEPS: [0, 240000, 320000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 360000
13 |   STEPS: [0, 240000, 320000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 90000
13 |   STEPS: [0, 60000, 80000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 130000
13 |   STEPS: [0, 100000, 120000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 90000
13 |   STEPS: [0, 60000, 80000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 130000
13 |   STEPS: [0, 100000, 120000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet101_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   NUM_CLASSES: 81
 5 |   FASTER_RCNN: True
 6 |   MASK_ON: True
 7 | RESNETS:
 8 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 9 | NUM_GPUS: 8
10 | SOLVER:
11 |   WEIGHT_DECAY: 0.0001
12 |   LR_POLICY: steps_with_decay
13 |   BASE_LR: 0.02
14 |   GAMMA: 0.1
15 |   MAX_ITER: 90000
16 |   STEPS: [0, 60000, 80000]
17 | FPN:
18 |   FPN_ON: True
19 |   MULTILEVEL_ROIS: True
20 |   MULTILEVEL_RPN: True
21 | FAST_RCNN:
22 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
23 |   ROI_XFORM_METHOD: RoIAlign
24 |   ROI_XFORM_RESOLUTION: 7
25 |   ROI_XFORM_SAMPLING_RATIO: 2
26 | MRCNN:
27 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
28 |   RESOLUTION: 28  # (output mask resolution) default 14
29 |   ROI_XFORM_METHOD: RoIAlign
30 |   ROI_XFORM_RESOLUTION: 14  # default 7
31 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
32 |   DILATION: 1  # default 2
33 |   CONV_INIT: MSRAFill  # default GaussianFill
34 | TRAIN:
35 |   SCALES: (800,)
36 |   MAX_SIZE: 1333
37 |   BATCH_SIZE_PER_IM: 512
38 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
39 | TEST:
40 |   SCALE: 800
41 |   MAX_SIZE: 1333
42 |   NMS: 0.5
43 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
44 |   RPN_POST_NMS_TOP_N: 1000
45 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 180000
16 |   STEPS: [0, 120000, 160000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 32
24 |   WIDTH_PER_GROUP: 8
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 360000
14 |   STEPS: [0, 240000, 320000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 32
24 |   WIDTH_PER_GROUP: 8
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 64
24 |   WIDTH_PER_GROUP: 4
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 360000
14 |   STEPS: [0, 240000, 320000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 64
24 |   WIDTH_PER_GROUP: 4
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet152_conv5_body
 4 |   NUM_CLASSES: 81
 5 |   FASTER_RCNN: True
 6 |   MASK_ON: True
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1)
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   MAX_ITER: 260000
15 |   STEPS: [0, 200000, 240000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | RESNETS:
21 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-152-32x8d-IN5k.pkl'
22 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
23 |   TRANS_FUNC: bottleneck_transformation
24 |   NUM_GROUPS: 32
25 |   WIDTH_PER_GROUP: 8
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (640, 672, 704, 736, 768, 800)  # Scale jitter
41 |   MAX_SIZE: 1333
42 |   IMS_PER_BATCH: 1
43 |   BATCH_SIZE_PER_IM: 512
44 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
45 | TEST:
46 |   SCALE: 800
47 |   MAX_SIZE: 1333
48 |   NMS: 0.5
49 |   BBOX_VOTE:
50 |     ENABLED: True
51 |     VOTE_TH: 0.9
52 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
53 |   RPN_POST_NMS_TOP_N: 1000
54 |   BBOX_AUG:
55 |     ENABLED: True
56 |     SCORE_HEUR: UNION
57 |     COORD_HEUR: UNION
58 |     H_FLIP: True
59 |     SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
60 |     MAX_SIZE: 2000
61 |     SCALE_H_FLIP: True
62 |     SCALE_SIZE_DEP: False
63 |     ASPECT_RATIOS: ()
64 |     ASPECT_RATIO_H_FLIP: False
65 |   MASK_AUG:
66 |     ENABLED: True
67 |     HEUR: SOFT_AVG
68 |     H_FLIP: True
69 |     SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
70 |     MAX_SIZE: 2000
71 |     SCALE_H_FLIP: True
72 |     SCALE_SIZE_DEP: False
73 |     ASPECT_RATIOS: ()
74 |     ASPECT_RATIO_H_FLIP: False
75 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 1
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.0025
12 |   GAMMA: 0.1
13 |   MAX_ITER: 60000
14 |   STEPS: [0, 30000, 40000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 2
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.005
12 |   GAMMA: 0.1
13 |   MAX_ITER: 30000
14 |   STEPS: [0, 15000, 20000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 
53 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 4
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 15000
14 |   STEPS: [0, 7500, 10000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 7500
14 |   STEPS: [0, 3750, 5000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 270000
13 |   STEPS: [0, 210000, 250000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 270000
13 |   STEPS: [0, 210000, 250000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 |   LOAD_IMAGENET_PRETRAINED_WEIGHTS: False
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 270000
14 |   STEPS: [0, 210000, 250000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 |   USE_GN: True  # Note: use GN on the FPN-specific layers
20 | RESNETS:
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 |   LOAD_IMAGENET_PRETRAINED_WEIGHTS: False
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 270000
14 |   STEPS: [0, 210000, 250000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 |   USE_GN: True  # Note: use GN on the FPN-specific layers
20 | RESNETS:
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k.jpg


--------------------------------------------------------------------------------
/demo/convert_pdf2img.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pdfdir=''
 4 | 
 5 | while getopts 'd:' flag; do
 6 |     case "$flag" in
 7 |         d) pdfdir=$OPTARG ;;
 8 |     esac
 9 | done
10 | 
11 | for pdf in $(ls ${pdfdir}/img*.pdf); do
12 |     fname="${pdf%.*}"
13 |     convert -density 300x300 -quality 95 $pdf ${fname}.jpg
14 | done
15 | 


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img1_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img1_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img2_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img2_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img1.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img2.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img3.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img4.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img1_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images_keypoints/img1_keypoints.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img2_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images_keypoints/img2_keypoints.jpg


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/core/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/datasets/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/datasets/cityscapes/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/coco_to_cityscapes_id.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | # mapping coco categories to cityscapes (our converted json) id
17 | # cityscapes
18 | # INFO roidb.py: 220: 1       bicycle: 7286
19 | # INFO roidb.py: 220: 2           car: 53684
20 | # INFO roidb.py: 220: 3        person: 35704
21 | # INFO roidb.py: 220: 4         train: 336
22 | # INFO roidb.py: 220: 5         truck: 964
23 | # INFO roidb.py: 220: 6    motorcycle: 1468
24 | # INFO roidb.py: 220: 7           bus: 758
25 | # INFO roidb.py: 220: 8         rider: 3504
26 | 
27 | # coco (val5k)
28 | # INFO roidb.py: 220: 1        person: 21296
29 | # INFO roidb.py: 220: 2       bicycle: 628
30 | # INFO roidb.py: 220: 3           car: 3818
31 | # INFO roidb.py: 220: 4    motorcycle: 732
32 | # INFO roidb.py: 220: 5      airplane: 286 <------ irrelevant
33 | # INFO roidb.py: 220: 6           bus: 564
34 | # INFO roidb.py: 220: 7         train: 380
35 | # INFO roidb.py: 220: 8         truck: 828
36 | 
37 | 
38 | def cityscapes_to_coco(cityscapes_id):
39 |     lookup = {
40 |         0: 0,  # ... background
41 |         1: 2,  # bicycle
42 |         2: 3,  # car
43 |         3: 1,  # person
44 |         4: 7,  # train
45 |         5: 8,  # truck
46 |         6: 4,  # motorcycle
47 |         7: 6,  # bus
48 |         8: -1,  # rider (-1 means rand init)
49 |     }
50 |     return lookup[cityscapes_id]
51 | 
52 | 
53 | def cityscapes_to_coco_with_rider(cityscapes_id):
54 |     lookup = {
55 |         0: 0,  # ... background
56 |         1: 2,  # bicycle
57 |         2: 3,  # car
58 |         3: 1,  # person
59 |         4: 7,  # train
60 |         5: 8,  # truck
61 |         6: 4,  # motorcycle
62 |         7: 6,  # bus
63 |         8: 1,  # rider ("person", *rider has human right!*)
64 |     }
65 |     return lookup[cityscapes_id]
66 | 
67 | 
68 | def cityscapes_to_coco_without_person_rider(cityscapes_id):
69 |     lookup = {
70 |         0: 0,  # ... background
71 |         1: 2,  # bicycle
72 |         2: 3,  # car
73 |         3: -1,  # person (ignore)
74 |         4: 7,  # train
75 |         5: 8,  # truck
76 |         6: 4,  # motorcycle
77 |         7: 6,  # bus
78 |         8: -1,  # rider (ignore)
79 |     }
80 |     return lookup[cityscapes_id]
81 | 
82 | 
83 | def cityscapes_to_coco_all_random(cityscapes_id):
84 |     lookup = {
85 |         0: -1,  # ... background
86 |         1: -1,  # bicycle
87 |         2: -1,  # car
88 |         3: -1,  # person (ignore)
89 |         4: -1,  # train
90 |         5: -1,  # truck
91 |         6: -1,  # motorcycle
92 |         7: -1,  # bus
93 |         8: -1,  # rider (ignore)
94 |     }
95 |     return lookup[cityscapes_id]
96 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/tools/convert_coco_model_to_cityscapes.py:
--------------------------------------------------------------------------------
  1 | # Convert a detection model trained for COCO into a model that can be fine-tuned
  2 | # on cityscapes
  3 | #
  4 | # cityscapes_to_coco
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | from __future__ import unicode_literals
 10 | 
 11 | from six.moves import cPickle as pickle
 12 | import argparse
 13 | import os
 14 | import sys
 15 | import numpy as np
 16 | 
 17 | import datasets.cityscapes.coco_to_cityscapes_id as cs
 18 | 
 19 | NUM_CS_CLS = 9
 20 | NUM_COCO_CLS = 81
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(
 25 |         description='Convert a COCO pre-trained model for use with Cityscapes')
 26 |     parser.add_argument(
 27 |         '--coco_model', dest='coco_model_file_name',
 28 |         help='Pretrained network weights file path',
 29 |         default=None, type=str)
 30 |     parser.add_argument(
 31 |         '--convert_func', dest='convert_func',
 32 |         help='Blob conversion function',
 33 |         default='cityscapes_to_coco', type=str)
 34 |     parser.add_argument(
 35 |         '--output', dest='out_file_name',
 36 |         help='Output file path',
 37 |         default=None, type=str)
 38 | 
 39 |     if len(sys.argv) == 1:
 40 |         parser.print_help()
 41 |         sys.exit(1)
 42 | 
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | 
 47 | def convert_coco_blobs_to_cityscape_blobs(model_dict):
 48 |     for k, v in model_dict['blobs'].items():
 49 |         if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS:
 50 |             coco_blob = model_dict['blobs'][k]
 51 |             print(
 52 |                 'Converting COCO blob {} with shape {}'.
 53 |                 format(k, coco_blob.shape)
 54 |             )
 55 |             cs_blob = convert_coco_blob_to_cityscapes_blob(
 56 |                 coco_blob, args.convert_func
 57 |             )
 58 |             print(' -> converted shape {}'.format(cs_blob.shape))
 59 |             model_dict['blobs'][k] = cs_blob
 60 | 
 61 | 
 62 | def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func):
 63 |     # coco blob (81, ...) or (81*4, ...)
 64 |     coco_shape = coco_blob.shape
 65 |     leading_factor = int(coco_shape[0] / NUM_COCO_CLS)
 66 |     tail_shape = list(coco_shape[1:])
 67 |     assert leading_factor == 1 or leading_factor == 4
 68 | 
 69 |     # Reshape in [num_classes, ...] form for easier manipulations
 70 |     coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape)
 71 |     # Default initialization uses Gaussian with mean and std to match the
 72 |     # existing parameters
 73 |     std = coco_blob.std()
 74 |     mean = coco_blob.mean()
 75 |     cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:])
 76 |     cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32)
 77 | 
 78 |     # Replace random parameters with COCO parameters if class mapping exists
 79 |     for i in range(NUM_CS_CLS):
 80 |         coco_cls_id = getattr(cs, convert_func)(i)
 81 |         if coco_cls_id >= 0:  # otherwise ignore (rand init)
 82 |             cs_blob[i] = coco_blob[coco_cls_id]
 83 | 
 84 |     cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape
 85 |     return cs_blob.reshape(cs_shape)
 86 | 
 87 | 
 88 | def remove_momentum(model_dict):
 89 |     for k in model_dict['blobs'].keys():
 90 |         if k.endswith('_momentum'):
 91 |             del model_dict['blobs'][k]
 92 | 
 93 | 
 94 | def load_and_convert_coco_model(args):
 95 |     with open(args.coco_model_file_name, 'r') as f:
 96 |         model_dict = pickle.load(f)
 97 |     remove_momentum(model_dict)
 98 |     convert_coco_blobs_to_cityscape_blobs(model_dict)
 99 |     return model_dict
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     args = parse_args()
104 |     print(args)
105 |     assert os.path.exists(args.coco_model_file_name), \
106 |         'Weights file does not exist'
107 |     weights = load_and_convert_coco_model(args)
108 | 
109 |     with open(args.out_file_name, 'w') as f:
110 |         pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
111 |     print('Wrote blobs to {}:'.format(args.out_file_name))
112 |     print(sorted(weights['blobs'].keys()))
113 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes_json_dataset_evaluator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Functions for evaluating results on Cityscapes."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import cv2
24 | import logging
25 | import os
26 | import uuid
27 | 
28 | import pycocotools.mask as mask_util
29 | 
30 | from core.config import cfg
31 | from datasets.dataset_catalog import DATASETS
32 | from datasets.dataset_catalog import RAW_DIR
33 | 
34 | logger = logging.getLogger(__name__)
35 | 
36 | 
37 | def evaluate_masks(
38 |     json_dataset,
39 |     all_boxes,
40 |     all_segms,
41 |     output_dir,
42 |     use_salt=True,
43 |     cleanup=False
44 | ):
45 |     if cfg.CLUSTER.ON_CLUSTER:
46 |         # On the cluster avoid saving these files in the job directory
47 |         output_dir = '/tmp'
48 |     res_file = os.path.join(
49 |         output_dir, 'segmentations_' + json_dataset.name + '_results')
50 |     if use_salt:
51 |         res_file += '_{}'.format(str(uuid.uuid4()))
52 |     res_file += '.json'
53 | 
54 |     results_dir = os.path.join(output_dir, 'results')
55 |     if not os.path.exists(results_dir):
56 |         os.mkdir(results_dir)
57 | 
58 |     os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR]
59 |     os.environ['CITYSCAPES_RESULTS'] = output_dir
60 | 
61 |     # Load the Cityscapes eval script *after* setting the required env vars,
62 |     # since the script reads their values into global variables (at load time).
63 |     import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
64 |         as cityscapes_eval
65 | 
66 |     roidb = json_dataset.get_roidb()
67 |     for i, entry in enumerate(roidb):
68 |         im_name = entry['image']
69 | 
70 |         basename = os.path.splitext(os.path.basename(im_name))[0]
71 |         txtname = os.path.join(output_dir, basename + 'pred.txt')
72 |         with open(txtname, 'w') as fid_txt:
73 |             if i % 10 == 0:
74 |                 logger.info('i: {}: {}'.format(i, basename))
75 |             for j in range(1, len(all_segms)):
76 |                 clss = json_dataset.classes[j]
77 |                 clss_id = cityscapes_eval.name2label[clss].id
78 |                 segms = all_segms[j][i]
79 |                 boxes = all_boxes[j][i]
80 |                 if segms == []:
81 |                     continue
82 |                 masks = mask_util.decode(segms)
83 | 
84 |                 for k in range(boxes.shape[0]):
85 |                     score = boxes[k, -1]
86 |                     mask = masks[:, :, k]
87 |                     pngname = os.path.join(
88 |                         'results',
89 |                         basename + '_' + clss + '_{}.png'.format(k))
90 |                     # write txt
91 |                     fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score))
92 |                     # save mask
93 |                     cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)
94 |     logger.info('Evaluating...')
95 |     cityscapes_eval.main([])
96 |     return None
97 | 


--------------------------------------------------------------------------------
/lib/datasets/dummy_datasets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use
16 | cases, like getting all classes in a dataset. This exists so that demos can be
17 | run without requiring users to download/install datasets first.
18 | """
19 | 
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | from __future__ import unicode_literals
24 | 
25 | from utils.collections import AttrDict
26 | 
27 | 
28 | def get_coco_dataset():
29 |     """A dummy COCO dataset that includes only the 'classes' field."""
30 |     ds = AttrDict()
31 |     classes = [
32 |         '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
33 |         'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
34 |         'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
35 |         'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
36 |         'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
37 |         'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
38 |         'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
39 |         'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
40 |         'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
41 |         'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
42 |         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
43 |         'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
44 |         'scissors', 'teddy bear', 'hair drier', 'toothbrush'
45 |     ]
46 |     ds.classes = {i: name for i, name in enumerate(classes)}
47 |     return ds
48 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | python setup.py build_ext --inplace
 6 | rm -rf build
 7 | 
 8 | # Choose cuda arch as you need
 9 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
10 |            -gencode arch=compute_35,code=sm_35 \
11 |            -gencode arch=compute_50,code=sm_50 \
12 |            -gencode arch=compute_52,code=sm_52 \
13 |            -gencode arch=compute_60,code=sm_60 \
14 |            -gencode arch=compute_61,code=sm_61 "
15 | #          -gencode arch=compute_70,code=sm_70 "
16 | 
17 | # compile NMS
18 | cd model/nms/src
19 | echo "Compiling nms kernels by nvcc..."
20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
21 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
22 | 
23 | cd ../
24 | python build.py
25 | 
26 | # compile roi_pooling
27 | cd ../../
28 | cd model/roi_pooling/src
29 | echo "Compiling roi pooling kernels by nvcc..."
30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
31 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
32 | cd ../
33 | python build.py
34 | 
35 | # # compile roi_align
36 | # cd ../../
37 | # cd model/roi_align/src
38 | # echo "Compiling roi align kernels by nvcc..."
39 | # nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
40 | # 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
41 | # cd ../
42 | # python build.py
43 | 
44 | # compile roi_crop
45 | cd ../../
46 | cd model/roi_crop/src
47 | echo "Compiling roi crop kernels by nvcc..."
48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
49 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
50 | cd ../
51 | python build.py
52 | 
53 | # compile roi_align (based on Caffe2's implementation)
54 | cd ../../
55 | cd modeling/roi_xfrom/roi_align/src
56 | echo "Compiling roi align kernels by nvcc..."
57 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
58 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
59 | cd ../
60 | python build.py
61 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from core.config import cfg
 9 | from model.nms.nms_gpu import nms_gpu
10 | 
11 | def nms(dets, thresh, force_cpu=False):
12 |     """Dispatch to either CPU or GPU NMS implementations."""
13 |     if dets.shape[0] == 0:
14 |         return []
15 |     # ---numpy version---
16 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
17 |     # ---pytorch version---
18 |     return nms_gpu(dets, thresh)
19 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 |     nms_cuda_compute(THCudaIntTensor_data(state, keep_out),
12 |                      THCudaIntTensor_data(state, num_out),
13 |                      THCudaTensor_data(state, boxes_host),
14 |                      THCudaTensor_size(state, boxes_host, 0),
15 |                      THCudaTensor_size(state, boxes_host, 1),
16 |                      nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_align_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             raise NotImplementedError
30 | 
31 |         return output
32 | 
33 |     def backward(self, grad_output):
34 |         assert(self.feature_size is not None and grad_output.is_cuda)
35 | 
36 |         batch_size, num_channels, data_height, data_width = self.feature_size
37 | 
38 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
39 |                                   data_width).zero_()
40 |         roi_align.roi_align_backward_cuda(self.aligned_height,
41 |                                           self.aligned_width,
42 |                                           self.spatial_scale, grad_output,
43 |                                           self.rois, grad_input)
44 | 
45 |         # print grad_input
46 | 
47 |         return grad_input, None
48 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python3 build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_pooling_cuda.c']
15 |     headers += ['src/roi_pooling_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_pooling.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_pooling',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Faster R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick and Sean Bell
 22 | # --------------------------------------------------------
 23 | 
 24 | import numpy as np
 25 | 
 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 27 | #
 28 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 29 | #    >> anchors
 30 | #
 31 | #    anchors =
 32 | #
 33 | #       -83   -39   100    56
 34 | #      -175   -87   192   104
 35 | #      -359  -183   376   200
 36 | #       -55   -55    72    72
 37 | #      -119  -119   136   136
 38 | #      -247  -247   264   264
 39 | #       -35   -79    52    96
 40 | #       -79  -167    96   184
 41 | #      -167  -343   184   360
 42 | 
 43 | # array([[ -83.,  -39.,  100.,   56.],
 44 | #        [-175.,  -87.,  192.,  104.],
 45 | #        [-359., -183.,  376.,  200.],
 46 | #        [ -55.,  -55.,   72.,   72.],
 47 | #        [-119., -119.,  136.,  136.],
 48 | #        [-247., -247.,  264.,  264.],
 49 | #        [ -35.,  -79.,   52.,   96.],
 50 | #        [ -79., -167.,   96.,  184.],
 51 | #        [-167., -343.,  184.,  360.]])
 52 | 
 53 | 
 54 | def generate_anchors(
 55 |     stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
 56 | ):
 57 |     """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
 58 |     are centered on stride / 2, have (approximate) sqrt areas of the specified
 59 |     sizes, and aspect ratios as given.
 60 |     """
 61 |     return _generate_anchors(
 62 |         stride,
 63 |         np.array(sizes, dtype=np.float) / stride,
 64 |         np.array(aspect_ratios, dtype=np.float)
 65 |     )
 66 | 
 67 | 
 68 | def _generate_anchors(base_size, scales, aspect_ratios):
 69 |     """Generate anchor (reference) windows by enumerating aspect ratios X
 70 |     scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
 71 |     """
 72 |     anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
 73 |     anchors = _ratio_enum(anchor, aspect_ratios)
 74 |     anchors = np.vstack(
 75 |         [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
 76 |     )
 77 |     return anchors
 78 | 
 79 | 
 80 | def _whctrs(anchor):
 81 |     """Return width, height, x center, and y center for an anchor (window)."""
 82 |     w = anchor[2] - anchor[0] + 1
 83 |     h = anchor[3] - anchor[1] + 1
 84 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 85 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 86 |     return w, h, x_ctr, y_ctr
 87 | 
 88 | 
 89 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 90 |     """Given a vector of widths (ws) and heights (hs) around a center
 91 |     (x_ctr, y_ctr), output a set of anchors (windows).
 92 |     """
 93 |     ws = ws[:, np.newaxis]
 94 |     hs = hs[:, np.newaxis]
 95 |     anchors = np.hstack(
 96 |         (
 97 |             x_ctr - 0.5 * (ws - 1),
 98 |             y_ctr - 0.5 * (hs - 1),
 99 |             x_ctr + 0.5 * (ws - 1),
100 |             y_ctr + 0.5 * (hs - 1)
101 |         )
102 |     )
103 |     return anchors
104 | 
105 | 
106 | def _ratio_enum(anchor, ratios):
107 |     """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
108 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
109 |     size = w * h
110 |     size_ratios = size / ratios
111 |     ws = np.round(np.sqrt(size_ratios))
112 |     hs = np.round(ws * ratios)
113 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 |     return anchors
115 | 
116 | 
117 | def _scale_enum(anchor, scales):
118 |     """Enumerate a set of anchors for each scale wrt an anchor."""
119 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
120 |     ws = w * scales
121 |     hs = h * scales
122 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
123 |     return anchors
124 | 


--------------------------------------------------------------------------------
/lib/modeling/generate_proposal_labels.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from core.config import cfg
 4 | from datasets import json_dataset
 5 | import roi_data.fast_rcnn
 6 | 
 7 | 
 8 | class GenerateProposalLabelsOp(nn.Module):
 9 |     def __init__(self):
10 |         super().__init__()
11 | 
12 |     def forward(self, rpn_rois, roidb, im_info):
13 |         """Op for generating training labels for RPN proposals. This is used
14 |         when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
15 |         Faster R-CNN training).
16 | 
17 |         blobs_in:
18 |           - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
19 |           - 'roidb': roidb entries that will be labeled
20 |           - 'im_info': See GenerateProposals doc.
21 | 
22 |         blobs_out:
23 |           - (variable set of blobs): returns whatever blobs are required for
24 |             training the model. It does this by querying the data loader for
25 |             the list of blobs that are needed.
26 |         """
27 |         im_scales = im_info.data.numpy()[:, 2]
28 | 
29 |         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
30 |         # For historical consistency with the original Faster R-CNN
31 |         # implementation we are *not* filtering crowd proposals.
32 |         # This choice should be investigated in the future (it likely does
33 |         # not matter).
34 |         # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
35 |         json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0)
36 |         blobs = {k: [] for k in output_blob_names}
37 |         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
38 | 
39 |         return blobs
40 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_align_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.sampling_ratio = int(sampling_ratio)
13 |         self.rois = None
14 |         self.feature_size = None
15 | 
16 |     def forward(self, features, rois):
17 |         self.rois = rois
18 |         self.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
24 |         if features.is_cuda:
25 |             roi_align.roi_align_forward_cuda(self.aligned_height,
26 |                                              self.aligned_width,
27 |                                              self.spatial_scale, self.sampling_ratio, features,
28 |                                              rois, output)
29 |         else:
30 |             raise NotImplementedError
31 | 
32 |         return output
33 | 
34 |     def backward(self, grad_output):
35 |         assert(self.feature_size is not None and grad_output.is_cuda)
36 | 
37 |         batch_size, num_channels, data_height, data_width = self.feature_size
38 | 
39 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
40 |                                   data_width).zero_()
41 |         roi_align.roi_align_backward_cuda(self.aligned_height,
42 |                                           self.aligned_width,
43 |                                           self.spatial_scale, self.sampling_ratio, grad_output,
44 |                                           self.rois, grad_input)
45 | 
46 |         # print grad_input
47 | 
48 |         return grad_input, None
49 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 |         self.sampling_ratio = int(sampling_ratio)
14 | 
15 |     def forward(self, features, rois):
16 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
17 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
18 | 
19 | class RoIAlignAvg(Module):
20 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
21 |         super(RoIAlignAvg, self).__init__()
22 | 
23 |         self.aligned_width = int(aligned_width)
24 |         self.aligned_height = int(aligned_height)
25 |         self.spatial_scale = float(spatial_scale)
26 |         self.sampling_ratio = int(sampling_ratio)
27 | 
28 |     def forward(self, features, rois):
29 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
30 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
31 |         return avg_pool2d(x, kernel_size=2, stride=1)
32 | 
33 | class RoIAlignMax(Module):
34 |     def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio):
35 |         super(RoIAlignMax, self).__init__()
36 | 
37 |         self.aligned_width = int(aligned_width)
38 |         self.aligned_height = int(aligned_height)
39 |         self.spatial_scale = float(spatial_scale)
40 |         self.sampling_ratio = int(sampling_ratio)
41 | 
42 |     def forward(self, features, rois):
43 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
44 |                                 self.spatial_scale, self.sampling_ratio)(features, rois)
45 |         return max_pool2d(x, kernel_size=2, stride=1)
46 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, sampling_ratio, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, sampling_ratio, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/modeling/roi_xfrom/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width,  const int sampling_ratio, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width,  const int sampling_ratio, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | from .parallel import DataParallel
3 | from . import init


--------------------------------------------------------------------------------
/lib/nn/functional.py:
--------------------------------------------------------------------------------
 1 | """Functional interface"""
 2 | 
 3 | 
 4 | def group_norm(x, num_groups, weight=None, bias=None, eps=1e-5):
 5 |     input_shape = x.shape
 6 |     ndim = len(input_shape)
 7 |     N, C = input_shape[:2]
 8 |     G = num_groups
 9 |     assert C % G == 0, "input channel dimension must divisible by number of groups"
10 |     x = x.view(N, G, -1)
11 |     mean = x.mean(-1, keepdim=True)
12 |     var = x.var(-1, keepdim=True)
13 |     x = (x - mean) / (var + eps).sqrt()
14 |     x = x.view(input_shape)
15 |     view_shape = (1, -1) + (1,) * (ndim - 2)
16 |     if weight is not None:
17 |         return x * weight.view(view_shape) + bias.view(view_shape)
18 |     return x
19 | 


--------------------------------------------------------------------------------
/lib/nn/init.py:
--------------------------------------------------------------------------------
 1 | """Parameter initialization functions
 2 | """
 3 | 
 4 | import math
 5 | import operator
 6 | from functools import reduce
 7 | 
 8 | import torch.nn.init as init
 9 | 
10 | 
11 | def XavierFill(tensor):
12 |     """Caffe2 XavierFill Implementation"""
13 |     size = reduce(operator.mul, tensor.shape, 1)
14 |     fan_in = size / tensor.shape[0]
15 |     scale = math.sqrt(3 / fan_in)
16 |     return init.uniform_(tensor, -scale, scale)
17 | 
18 | 
19 | def MSRAFill(tensor):
20 |     """Caffe2 MSRAFill Implementation"""
21 |     size = reduce(operator.mul, tensor.shape, 1)
22 |     fan_out = size / tensor.shape[1]
23 |     scale = math.sqrt(2 / fan_out)
24 |     return init.normal_(tensor, 0, scale)
25 | 


--------------------------------------------------------------------------------
/lib/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .affine import AffineChannel2d
2 | from .normalization import GroupNorm
3 | from .upsample import BilinearInterpolation2d
4 | 


--------------------------------------------------------------------------------
/lib/nn/modules/affine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AffineChannel2d(nn.Module):
 6 |     """ A simple channel-wise affine transformation operation """
 7 |     def __init__(self, num_features):
 8 |         super().__init__()
 9 |         self.num_features = num_features
10 |         self.weight = nn.Parameter(torch.Tensor(num_features))
11 |         self.bias = nn.Parameter(torch.Tensor(num_features))
12 |         self.weight.data.uniform_()
13 |         self.bias.data.zero_()
14 | 
15 |     def forward(self, x):
16 |         return x * self.weight.view(1, self.num_features, 1, 1) + \
17 |             self.bias.view(1, self.num_features, 1, 1)
18 | 


--------------------------------------------------------------------------------
/lib/nn/modules/normalization.py:
--------------------------------------------------------------------------------
 1 | """Normalization Layers"""
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | import nn.functional as myF
 7 | 
 8 | 
 9 | class GroupNorm(nn.Module):
10 |     def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
11 |         super().__init__()
12 |         self.num_groups = num_groups
13 |         self.num_channels = num_channels
14 |         self.eps = eps
15 |         self.affine = affine
16 |         if self.affine:
17 |             self.weight = nn.Parameter(torch.Tensor(num_channels))
18 |             self.bias = nn.Parameter(torch.Tensor(num_channels))
19 |         else:
20 |             self.register_parameter('weight', None)
21 |             self.register_parameter('bias', None)
22 |         self.reset_parameters()
23 | 
24 |     def reset_parameters(self):
25 |         if self.affine:
26 |             self.weight.data.fill_(1)
27 |             self.bias.data.zero_()
28 | 
29 |     def forward(self, x):
30 |         return myF.group_norm(
31 |             x, self.num_groups, self.weight, self.bias, self.eps
32 |         )
33 | 
34 |     def extra_repr(self):
35 |         return '{num_groups}, {num_channels}, eps={eps}, ' \
36 |             'affine={affine}'.format(**self.__dict__)
37 | 


--------------------------------------------------------------------------------
/lib/nn/modules/upsample.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | 
 8 | 
 9 | class BilinearInterpolation2d(nn.Module):
10 |     """Bilinear interpolation in space of scale.
11 | 
12 |     Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
13 | 
14 |     Adapted from the CVPR'15 FCN code.
15 |     See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
16 |     """
17 |     def __init__(self, in_channels, out_channels, up_scale):
18 |         super().__init__()
19 |         assert in_channels == out_channels
20 |         assert up_scale % 2 == 0, 'Scale should be even'
21 |         self.in_channes = in_channels
22 |         self.out_channels = out_channels
23 |         self.up_scale = int(up_scale)
24 |         self.padding = up_scale // 2
25 | 
26 |         def upsample_filt(size):
27 |             factor = (size + 1) // 2
28 |             if size % 2 == 1:
29 |                 center = factor - 1
30 |             else:
31 |                 center = factor - 0.5
32 |             og = np.ogrid[:size, :size]
33 |             return ((1 - abs(og[0] - center) / factor) *
34 |                     (1 - abs(og[1] - center) / factor))
35 | 
36 |         kernel_size = up_scale * 2
37 |         bil_filt = upsample_filt(kernel_size)
38 | 
39 |         kernel = np.zeros(
40 |             (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32
41 |         )
42 |         kernel[range(in_channels), range(out_channels), :, :] = bil_filt
43 | 
44 |         self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
45 |                                          stride=self.up_scale, padding=self.padding)
46 | 
47 |         self.upconv.weight.data.copy_(torch.from_numpy(kernel))
48 |         self.upconv.bias.data.fill_(0)
49 |         self.upconv.weight.requires_grad = False
50 |         self.upconv.bias.requires_grad = False
51 | 
52 |     def forward(self, x):
53 |         return self.upconv(x)
54 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | from .parallel_apply import parallel_apply
2 | from .replicate import replicate
3 | from .data_parallel import DataParallel, data_parallel
4 | from .scatter_gather import scatter, gather
5 | 
6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel',
7 |            'DataParallel']
8 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/_functions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.cuda.comm as comm
  3 | from torch.autograd import Function
  4 | 
  5 | 
  6 | class Broadcast(Function):
  7 | 
  8 |     @staticmethod
  9 |     def forward(ctx, target_gpus, *inputs):
 10 |         if not all(input.is_cuda for input in inputs):
 11 |             raise TypeError('Broadcast function not implemented for CPU tensors')
 12 |         ctx.target_gpus = target_gpus
 13 |         if len(inputs) == 0:
 14 |             return tuple()
 15 |         ctx.num_inputs = len(inputs)
 16 |         ctx.input_device = inputs[0].get_device()
 17 |         outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus)
 18 |         non_differentiables = []
 19 |         for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]):
 20 |             if not input_requires_grad:
 21 |                 for output in outputs:
 22 |                     non_differentiables.append(output[idx])
 23 |         ctx.mark_non_differentiable(*non_differentiables)
 24 |         return tuple([t for tensors in outputs for t in tensors])
 25 | 
 26 |     @staticmethod
 27 |     def backward(ctx, *grad_outputs):
 28 |         return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs)
 29 | 
 30 | 
 31 | class ReduceAddCoalesced(Function):
 32 | 
 33 |     @staticmethod
 34 |     def forward(ctx, destination, num_inputs, *grads):
 35 |         ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)]
 36 | 
 37 |         grads = [grads[i:i + num_inputs]
 38 |                  for i in range(0, len(grads), num_inputs)]
 39 |         return comm.reduce_add_coalesced(grads, destination)
 40 | 
 41 |     @staticmethod
 42 |     def backward(ctx, *grad_outputs):
 43 |         return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs)
 44 | 
 45 | 
 46 | class Gather(Function):
 47 | 
 48 |     @staticmethod
 49 |     def forward(ctx, target_device, dim, *inputs):
 50 |         assert all(map(lambda i: i.is_cuda, inputs))
 51 |         ctx.target_device = target_device
 52 |         ctx.dim = dim
 53 |         ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs))
 54 |         ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
 55 |         return comm.gather(inputs, ctx.dim, ctx.target_device)
 56 | 
 57 |     @staticmethod
 58 |     def backward(ctx, grad_output):
 59 |         return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output)
 60 | 
 61 | 
 62 | class Scatter(Function):
 63 | 
 64 |     @staticmethod
 65 |     def forward(ctx, target_gpus, chunk_sizes, dim, input):
 66 |         ctx.target_gpus = target_gpus
 67 |         ctx.chunk_sizes = chunk_sizes
 68 |         ctx.dim = dim
 69 |         ctx.input_device = input.get_device() if input.is_cuda else -1
 70 |         streams = None
 71 |         if ctx.input_device == -1:
 72 |             # Perform CPU to GPU copies in a background stream
 73 |             streams = [_get_stream(device) for device in ctx.target_gpus]
 74 |         outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams)
 75 |         # Synchronize with the copy stream
 76 |         if streams is not None:
 77 |             for i, output in enumerate(outputs):
 78 |                 with torch.cuda.device(ctx.target_gpus[i]):
 79 |                     main_stream = torch.cuda.current_stream()
 80 |                     main_stream.wait_stream(streams[i])
 81 |                     output.record_stream(main_stream)
 82 |         return outputs
 83 | 
 84 |     @staticmethod
 85 |     def backward(ctx, *grad_output):
 86 |         return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output)
 87 | 
 88 | 
 89 | # background streams used for copying
 90 | _streams = None
 91 | 
 92 | 
 93 | def _get_stream(device):
 94 |     """Gets a background stream for copying between CPU and GPU"""
 95 |     global _streams
 96 |     if device == -1:
 97 |         return None
 98 |     if _streams is None:
 99 |         _streams = [None] * torch.cuda.device_count()
100 |     if _streams[device] is None:
101 |         _streams[device] = torch.cuda.Stream(device)
102 |     return _streams[device]
103 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/parallel_apply.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | def get_a_var(obj):
 7 |     if isinstance(obj, Variable):
 8 |         return obj
 9 | 
10 |     if isinstance(obj, list) or isinstance(obj, tuple):
11 |         results = map(get_a_var, obj)
12 |         for result in results:
13 |             if isinstance(result, Variable):
14 |                 return result
15 |     if isinstance(obj, dict):
16 |         results = map(get_a_var, obj.items())
17 |         for result in results:
18 |             if isinstance(result, Variable):
19 |                 return result
20 |     return None
21 | 
22 | 
23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None):
24 |     assert len(modules) == len(inputs)
25 |     if kwargs_tup is not None:
26 |         assert len(modules) == len(kwargs_tup)
27 |     else:
28 |         kwargs_tup = ({},) * len(modules)
29 |     if devices is not None:
30 |         assert len(modules) == len(devices)
31 |     else:
32 |         devices = [None] * len(modules)
33 | 
34 |     lock = threading.Lock()
35 |     results = {}
36 | 
37 |     def _worker(i, module, input, kwargs, results, lock, device=None):
38 |         if device is None:
39 |             device = get_a_var(input).get_device()
40 |         try:
41 |             with torch.cuda.device(device):
42 |                 output = module(*input, **kwargs)
43 |             with lock:
44 |                 results[i] = output
45 |         except Exception as e:
46 |             with lock:
47 |                 results[i] = e
48 | 
49 |     if len(modules) > 1:
50 |         threads = [threading.Thread(target=_worker,
51 |                                     args=(i, module, input, kwargs, results, lock, device),
52 |                                     )
53 |                    for i, (module, input, kwargs, device) in
54 |                    enumerate(zip(modules, inputs, kwargs_tup, devices))]
55 | 
56 |         for thread in threads:
57 |             thread.start()
58 |         for thread in threads:
59 |             thread.join()
60 |     else:
61 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0])
62 | 
63 |     outputs = []
64 |     for i in range(len(inputs)):
65 |         output = results[i]
66 |         if isinstance(output, Exception):
67 |             raise output
68 |         outputs.append(output)
69 |     return outputs
70 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/replicate.py:
--------------------------------------------------------------------------------
 1 | import torch.cuda.comm as comm
 2 | 
 3 | 
 4 | def replicate(network, devices):
 5 |     from ._functions import Broadcast
 6 | 
 7 |     devices = tuple(devices)
 8 |     num_replicas = len(devices)
 9 | 
10 |     params = list(network.parameters())
11 |     param_indices = {param: idx for idx, param in enumerate(params)}
12 |     param_copies = Broadcast.apply(devices, *params)
13 |     if len(params) > 0:
14 |         param_copies = [param_copies[i:i + len(params)]
15 |                         for i in range(0, len(param_copies), len(params))]
16 | 
17 |     buffers = list(network._all_buffers())
18 |     buffer_indices = {buf: idx for idx, buf in enumerate(buffers)}
19 |     buffer_copies = comm.broadcast_coalesced(buffers, devices)
20 | 
21 |     modules = list(network.modules())
22 |     module_copies = [[] for device in devices]
23 |     module_indices = {}
24 | 
25 |     for i, module in enumerate(modules):
26 |         module_indices[module] = i
27 |         for j in range(num_replicas):
28 |             replica = module.__new__(type(module))
29 |             replica.__dict__ = module.__dict__.copy()
30 |             replica._parameters = replica._parameters.copy()
31 |             replica._buffers = replica._buffers.copy()
32 |             replica._modules = replica._modules.copy()
33 |             module_copies[j].append(replica)
34 | 
35 |     for i, module in enumerate(modules):
36 |         for key, child in module._modules.items():
37 |             if child is None:
38 |                 for j in range(num_replicas):
39 |                     replica = module_copies[j][i]
40 |                     replica._modules[key] = None
41 |             else:
42 |                 module_idx = module_indices[child]
43 |                 for j in range(num_replicas):
44 |                     replica = module_copies[j][i]
45 |                     replica._modules[key] = module_copies[j][module_idx]
46 |         for key, param in module._parameters.items():
47 |             if param is None:
48 |                 for j in range(num_replicas):
49 |                     replica = module_copies[j][i]
50 |                     replica._parameters[key] = None
51 |             else:
52 |                 param_idx = param_indices[param]
53 |                 for j in range(num_replicas):
54 |                     replica = module_copies[j][i]
55 |                     replica._parameters[key] = param_copies[j][param_idx]
56 |         for key, buf in module._buffers.items():
57 |             if buf is None:
58 |                 for j in range(num_replicas):
59 |                     replica = module_copies[j][i]
60 |                     replica._buffers[key] = None
61 |             else:
62 |                 buffer_idx = buffer_indices[buf]
63 |                 for j in range(num_replicas):
64 |                     replica = module_copies[j][i]
65 |                     replica._buffers[key] = buffer_copies[j][buffer_idx]
66 | 
67 |     return [module_copies[j][0] for j in range(num_replicas)]
68 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import re
 3 | import numpy as np
 4 | import torch
 5 | from torch.autograd import Variable
 6 | from ._functions import Scatter, Gather
 7 | from torch._six import string_classes, int_classes
 8 | from torch.utils.data.dataloader import numpy_type_map
 9 | 
10 | 
11 | def scatter(inputs, target_gpus, dim=0):
12 |     r"""
13 |     Slices variables into approximately equal chunks and
14 |     distributes them across given GPUs. Duplicates
15 |     references to objects that are not variables. Does not
16 |     support Tensors.
17 |     """
18 |     def scatter_map(obj):
19 |         if isinstance(obj, Variable):
20 |             return Scatter.apply(target_gpus, None, dim, obj)
21 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
22 |         if isinstance(obj, tuple) and len(obj) > 0:
23 |             return list(zip(*map(scatter_map, obj)))
24 |         if isinstance(obj, list) and len(obj) > 0:
25 |             return list(map(list, zip(*map(scatter_map, obj))))
26 |         if isinstance(obj, dict) and len(obj) > 0:
27 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
28 |         return [obj for targets in target_gpus]
29 | 
30 |     # After scatter_map is called, a scatter_map cell will exist. This cell
31 |     # has a reference to the actual function scatter_map, which has references
32 |     # to a closure that has a reference to the scatter_map cell (because the
33 |     # fn is recursive). To avoid this reference cycle, we set the function to
34 |     # None, clearing the cell
35 |     try:
36 |         return scatter_map(inputs)
37 |     finally:
38 |         scatter_map = None
39 | 
40 | 
41 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
42 |     r"""Scatter with support for kwargs dictionary"""
43 |     inputs = scatter(inputs, target_gpus, dim) if inputs else []
44 |     kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
45 |     if len(inputs) < len(kwargs):
46 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
47 |     elif len(kwargs) < len(inputs):
48 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
49 |     inputs = tuple(inputs)
50 |     kwargs = tuple(kwargs)
51 |     return inputs, kwargs
52 | 
53 | 
54 | def gather(outputs, target_device, dim=0):
55 |     r"""
56 |     Gathers variables from different GPUs on a specified device
57 |       (-1 means the CPU).
58 |     """
59 |     error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}"
60 | 
61 |     def gather_map(outputs):
62 |         out = outputs[0]
63 |         elem_type = type(out)
64 |         if isinstance(out, Variable):
65 |             return Gather.apply(target_device, dim, *outputs)
66 |         if out is None:
67 |             return None
68 |         if isinstance(out, collections.Sequence):
69 |             return type(out)(map(gather_map, zip(*outputs)))
70 |         elif isinstance(out, collections.Mapping):
71 |             return {key: gather_map([d[key] for d in outputs]) for key in out}
72 |         elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
73 |                 and elem_type.__name__ != 'string_':
74 |             elem = out
75 |             if elem_type.__name__ == 'ndarray':
76 |                 # array of string classes and object
77 |                 if re.search('[SaUO]', elem.dtype.str) is not None:
78 |                     raise TypeError(error_msg.format(elem.dtype))
79 | 
80 |                 return Variable(torch.from_numpy(np.concatenate(outputs, dim)))
81 |             if elem.shape == ():  # scalars
82 |                 py_type = float if elem.dtype.name.startswith('float') else int
83 |                 return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs))))
84 |         elif isinstance(out, int_classes):
85 |             return Variable(torch.LongTensor(outputs))
86 |         elif isinstance(out, float):
87 |             return Variable(torch.DoubleTensor(outputs))
88 |         elif isinstance(out, string_classes):
89 |             return outputs
90 | 
91 |         raise TypeError((error_msg.format(elem_type)))
92 | 
93 |     # Recursive function calls like this create reference cycles.
94 |     # Setting the function to None clears the refcycle.
95 |     try:
96 |         return gather_map(outputs)
97 |     finally:
98 |         gather_map = None
99 | 


--------------------------------------------------------------------------------
/lib/roi_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/roi_data/__init__.py


--------------------------------------------------------------------------------
/lib/roi_data/data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | from __future__ import unicode_literals
 23 | 
 24 | from collections import namedtuple
 25 | import logging
 26 | import numpy as np
 27 | import threading
 28 | 
 29 | from core.config import cfg
 30 | from modeling.generate_anchors import generate_anchors
 31 | import utils.boxes as box_utils
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the
 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range
 38 | # of aspect ratios
 39 | FieldOfAnchors = namedtuple(
 40 |     'FieldOfAnchors', [
 41 |         'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',
 42 |         'octave', 'aspect'
 43 |     ]
 44 | )
 45 | 
 46 | # Cache for memoizing _get_field_of_anchors
 47 | _threadlocal_foa = threading.local()
 48 | 
 49 | 
 50 | def get_field_of_anchors(
 51 |     stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None
 52 | ):
 53 |     global _threadlocal_foa
 54 |     if not hasattr(_threadlocal_foa, 'cache'):
 55 |         _threadlocal_foa.cache = {}
 56 | 
 57 |     cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)
 58 |     if cache_key in _threadlocal_foa.cache:
 59 |         return _threadlocal_foa.cache[cache_key]
 60 | 
 61 |     # Anchors at a single feature cell
 62 |     cell_anchors = generate_anchors(
 63 |         stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios
 64 |     )
 65 |     num_cell_anchors = cell_anchors.shape[0]
 66 | 
 67 |     # Generate canonical proposals from shifted anchors
 68 |     # Enumerate all shifted positions on the (H, W) grid
 69 |     fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(
 70 |         cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)
 71 |     )
 72 |     field_size = int(np.ceil(fpn_max_size / float(stride)))
 73 |     shifts = np.arange(0, field_size) * stride
 74 |     shift_x, shift_y = np.meshgrid(shifts, shifts)
 75 |     shift_x = shift_x.ravel()
 76 |     shift_y = shift_y.ravel()
 77 |     shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
 78 | 
 79 |     # Broacast anchors over shifts to enumerate all anchors at all positions
 80 |     # in the (H, W) grid:
 81 |     #   - add A cell anchors of shape (1, A, 4) to
 82 |     #   - K shifts of shape (K, 1, 4) to get
 83 |     #   - all shifted anchors of shape (K, A, 4)
 84 |     #   - reshape to (K*A, 4) shifted anchors
 85 |     A = num_cell_anchors
 86 |     K = shifts.shape[0]
 87 |     field_of_anchors = (
 88 |         cell_anchors.reshape((1, A, 4)) +
 89 |         shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 90 |     )
 91 |     field_of_anchors = field_of_anchors.reshape((K * A, 4))
 92 |     foa = FieldOfAnchors(
 93 |         field_of_anchors=field_of_anchors.astype(np.float32),
 94 |         num_cell_anchors=num_cell_anchors,
 95 |         stride=stride,
 96 |         field_size=field_size,
 97 |         octave=octave,
 98 |         aspect=aspect
 99 |     )
100 |     _threadlocal_foa.cache[cache_key] = foa
101 |     return foa
102 | 
103 | 
104 | def unmap(data, count, inds, fill=0):
105 |     """Unmap a subset of item (data) back to the original set of items (of
106 |     size count)"""
107 |     if count == len(inds):
108 |         return data
109 | 
110 |     if len(data.shape) == 1:
111 |         ret = np.empty((count, ), dtype=data.dtype)
112 |         ret.fill(fill)
113 |         ret[inds] = data
114 |     else:
115 |         ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)
116 |         ret.fill(fill)
117 |         ret[inds, :] = data
118 |     return ret
119 | 
120 | 
121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
122 |     """Compute bounding-box regression targets for an image."""
123 |     return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
124 |         np.float32, copy=False
125 |     )
126 | 


--------------------------------------------------------------------------------
/lib/roi_data/minibatch.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from core.config import cfg
 5 | import utils.blob as blob_utils
 6 | import roi_data.rpn
 7 | 
 8 | 
 9 | def get_minibatch_blob_names(is_training=True):
10 |     """Return blob names in the order in which they are read by the data loader.
11 |     """
12 |     # data blob: holds a batch of N images, each with 3 channels
13 |     blob_names = ['data']
14 |     if cfg.RPN.RPN_ON:
15 |         # RPN-only or end-to-end Faster R-CNN
16 |         blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training)
17 |     elif cfg.RETINANET.RETINANET_ON:
18 |         raise NotImplementedError
19 |     else:
20 |         # Fast R-CNN like models trained on precomputed proposals
21 |         blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names(
22 |             is_training=is_training
23 |         )
24 |     return blob_names
25 | 
26 | 
27 | def get_minibatch(roidb):
28 |     """Given a roidb, construct a minibatch sampled from it."""
29 |     # We collect blobs from each image onto a list and then concat them into a
30 |     # single tensor, hence we initialize each blob to an empty list
31 |     blobs = {k: [] for k in get_minibatch_blob_names()}
32 | 
33 |     # Get the input image blob
34 |     im_blob, im_scales = _get_image_blob(roidb)
35 |     blobs['data'] = im_blob
36 |     if cfg.RPN.RPN_ON:
37 |         # RPN-only or end-to-end Faster/Mask R-CNN
38 |         valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb)
39 |     elif cfg.RETINANET.RETINANET_ON:
40 |         raise NotImplementedError
41 |     else:
42 |         # Fast R-CNN like models trained on precomputed proposals
43 |         valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
44 |     return blobs, valid
45 | 
46 | 
47 | def _get_image_blob(roidb):
48 |     """Builds an input blob from the images in the roidb at the specified
49 |     scales.
50 |     """
51 |     num_images = len(roidb)
52 |     # Sample random scales to use for each image in this batch
53 |     scale_inds = np.random.randint(
54 |         0, high=len(cfg.TRAIN.SCALES), size=num_images)
55 |     processed_ims = []
56 |     im_scales = []
57 |     for i in range(num_images):
58 |         im = cv2.imread(roidb[i]['image'])
59 |         assert im is not None, \
60 |             'Failed to read image \'{}\''.format(roidb[i]['image'])
61 |         # If NOT using opencv to read in images, uncomment following lines
62 |         # if len(im.shape) == 2:
63 |         #     im = im[:, :, np.newaxis]
64 |         #     im = np.concatenate((im, im, im), axis=2)
65 |         # # flip the channel, since the original one using cv2
66 |         # # rgb -> bgr
67 |         # im = im[:, :, ::-1]
68 |         if roidb[i]['flipped']:
69 |             im = im[:, ::-1, :]
70 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
71 |         im, im_scale = blob_utils.prep_im_for_blob(
72 |             im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
73 |         im_scales.append(im_scale[0])
74 |         processed_ims.append(im[0])
75 | 
76 |     # Create a blob to hold the input images [n, c, h, w]
77 |     blob = blob_utils.im_list_to_blob(processed_ims)
78 | 
79 |     return blob, im_scales
80 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from __future__ import print_function
 9 | 
10 | from Cython.Build import cythonize
11 | from Cython.Distutils import build_ext
12 | from setuptools import Extension
13 | from setuptools import setup
14 | 
15 | import numpy as np
16 | 
17 | 
18 | # Obtain the numpy include directory.  This logic works across numpy versions.
19 | try:
20 |     numpy_include = np.get_include()
21 | except AttributeError:
22 |     numpy_include = np.get_numpy_include()
23 | 
24 | 
25 | ext_modules = [
26 |     Extension(
27 |         name='utils.cython_bbox',
28 |         sources=['utils/cython_bbox.pyx'],
29 |         extra_compile_args=['-Wno-cpp'],
30 |         include_dirs=[numpy_include]
31 |     ),
32 |     Extension(
33 |         name='utils.cython_nms',
34 |         sources=['utils/cython_nms.pyx'],
35 |         extra_compile_args=['-Wno-cpp'],
36 |         include_dirs=[numpy_include]
37 |     )
38 | ]
39 | 
40 | setup(
41 |     name='mask_rcnn',
42 |     ext_modules=cythonize(ext_modules)
43 | )
44 | 
45 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/collections.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """A simple attribute dictionary used for representing configuration options."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | 
24 | class AttrDict(dict):
25 | 
26 |     IMMUTABLE = '__immutable__'
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         super(AttrDict, self).__init__(*args, **kwargs)
30 |         self.__dict__[AttrDict.IMMUTABLE] = False
31 | 
32 |     def __getattr__(self, name):
33 |         if name in self.__dict__:
34 |             return self.__dict__[name]
35 |         elif name in self:
36 |             return self[name]
37 |         else:
38 |             raise AttributeError(name)
39 | 
40 |     def __setattr__(self, name, value):
41 |         if not self.__dict__[AttrDict.IMMUTABLE]:
42 |             if name in self.__dict__:
43 |                 self.__dict__[name] = value
44 |             else:
45 |                 self[name] = value
46 |         else:
47 |             raise AttributeError(
48 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 |                 format(name, value)
50 |             )
51 | 
52 |     def immutable(self, is_immutable):
53 |         """Set immutability to is_immutable and recursively apply the setting
54 |         to all nested AttrDicts.
55 |         """
56 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 |         # Recursively set immutable state
58 |         for v in self.__dict__.values():
59 |             if isinstance(v, AttrDict):
60 |                 v.immutable(is_immutable)
61 |         for v in self.values():
62 |             if isinstance(v, AttrDict):
63 |                 v.immutable(is_immutable)
64 | 
65 |     def is_immutable(self):
66 |         return self.__dict__[AttrDict.IMMUTABLE]
67 | 


--------------------------------------------------------------------------------
/lib/utils/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """An awesome colormap for really neat visualizations."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | import numpy as np
 24 | 
 25 | 
 26 | def colormap(rgb=False):
 27 |     color_list = np.array(
 28 |         [
 29 |             0.000, 0.447, 0.741,
 30 |             0.850, 0.325, 0.098,
 31 |             0.929, 0.694, 0.125,
 32 |             0.494, 0.184, 0.556,
 33 |             0.466, 0.674, 0.188,
 34 |             0.301, 0.745, 0.933,
 35 |             0.635, 0.078, 0.184,
 36 |             0.300, 0.300, 0.300,
 37 |             0.600, 0.600, 0.600,
 38 |             1.000, 0.000, 0.000,
 39 |             1.000, 0.500, 0.000,
 40 |             0.749, 0.749, 0.000,
 41 |             0.000, 1.000, 0.000,
 42 |             0.000, 0.000, 1.000,
 43 |             0.667, 0.000, 1.000,
 44 |             0.333, 0.333, 0.000,
 45 |             0.333, 0.667, 0.000,
 46 |             0.333, 1.000, 0.000,
 47 |             0.667, 0.333, 0.000,
 48 |             0.667, 0.667, 0.000,
 49 |             0.667, 1.000, 0.000,
 50 |             1.000, 0.333, 0.000,
 51 |             1.000, 0.667, 0.000,
 52 |             1.000, 1.000, 0.000,
 53 |             0.000, 0.333, 0.500,
 54 |             0.000, 0.667, 0.500,
 55 |             0.000, 1.000, 0.500,
 56 |             0.333, 0.000, 0.500,
 57 |             0.333, 0.333, 0.500,
 58 |             0.333, 0.667, 0.500,
 59 |             0.333, 1.000, 0.500,
 60 |             0.667, 0.000, 0.500,
 61 |             0.667, 0.333, 0.500,
 62 |             0.667, 0.667, 0.500,
 63 |             0.667, 1.000, 0.500,
 64 |             1.000, 0.000, 0.500,
 65 |             1.000, 0.333, 0.500,
 66 |             1.000, 0.667, 0.500,
 67 |             1.000, 1.000, 0.500,
 68 |             0.000, 0.333, 1.000,
 69 |             0.000, 0.667, 1.000,
 70 |             0.000, 1.000, 1.000,
 71 |             0.333, 0.000, 1.000,
 72 |             0.333, 0.333, 1.000,
 73 |             0.333, 0.667, 1.000,
 74 |             0.333, 1.000, 1.000,
 75 |             0.667, 0.000, 1.000,
 76 |             0.667, 0.333, 1.000,
 77 |             0.667, 0.667, 1.000,
 78 |             0.667, 1.000, 1.000,
 79 |             1.000, 0.000, 1.000,
 80 |             1.000, 0.333, 1.000,
 81 |             1.000, 0.667, 1.000,
 82 |             0.167, 0.000, 0.000,
 83 |             0.333, 0.000, 0.000,
 84 |             0.500, 0.000, 0.000,
 85 |             0.667, 0.000, 0.000,
 86 |             0.833, 0.000, 0.000,
 87 |             1.000, 0.000, 0.000,
 88 |             0.000, 0.167, 0.000,
 89 |             0.000, 0.333, 0.000,
 90 |             0.000, 0.500, 0.000,
 91 |             0.000, 0.667, 0.000,
 92 |             0.000, 0.833, 0.000,
 93 |             0.000, 1.000, 0.000,
 94 |             0.000, 0.000, 0.167,
 95 |             0.000, 0.000, 0.333,
 96 |             0.000, 0.000, 0.500,
 97 |             0.000, 0.000, 0.667,
 98 |             0.000, 0.000, 0.833,
 99 |             0.000, 0.000, 1.000,
100 |             0.000, 0.000, 0.000,
101 |             0.143, 0.143, 0.143,
102 |             0.286, 0.286, 0.286,
103 |             0.429, 0.429, 0.429,
104 |             0.571, 0.571, 0.571,
105 |             0.714, 0.714, 0.714,
106 |             0.857, 0.857, 0.857,
107 |             1.000, 1.000, 1.000
108 |         ]
109 |     ).astype(np.float32)
110 |     color_list = color_list.reshape((-1, 3)) * 255
111 |     if not rgb:
112 |         color_list = color_list[:, ::-1]
113 |     return color_list
114 | 


--------------------------------------------------------------------------------
/lib/utils/cython_bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps
74 | 


--------------------------------------------------------------------------------
/lib/utils/detectron_weight_helper.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for loading pretrained weights from Detectron pickle files
 2 | """
 3 | 
 4 | import pickle
 5 | import re
 6 | import torch
 7 | 
 8 | 
 9 | def load_detectron_weight(net, detectron_weight_file):
10 |     name_mapping, orphan_in_detectron = net.detectron_weight_mapping
11 | 
12 |     with open(detectron_weight_file, 'rb') as fp:
13 |         src_blobs = pickle.load(fp, encoding='latin1')
14 |     if 'blobs' in src_blobs:
15 |         src_blobs = src_blobs['blobs']
16 | 
17 |     params = net.state_dict()
18 |     for p_name, p_tensor in params.items():
19 |         d_name = name_mapping[p_name]
20 |         if isinstance(d_name, str):  # maybe str, None or True
21 |             p_tensor.copy_(torch.Tensor(src_blobs[d_name]))
22 | 
23 | 
24 | def resnet_weights_name_pattern():
25 |     pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+")
26 |     return pattern
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     """Testing"""
31 |     from pprint import pprint
32 |     import sys
33 |     sys.path.insert(0, '..')
34 |     from modeling.model_builder import Generalized_RCNN
35 |     from core.config import cfg, cfg_from_file
36 | 
37 |     cfg.MODEL.NUM_CLASSES = 81
38 |     cfg_from_file('../../cfgs/res50_mask.yml')
39 |     net = Generalized_RCNN()
40 | 
41 |     # pprint(list(net.state_dict().keys()), width=1)
42 | 
43 |     mapping, orphans = net.detectron_weight_mapping
44 |     state_dict = net.state_dict()
45 | 
46 |     for k in mapping.keys():
47 |         assert k in state_dict, '%s' % k
48 | 
49 |     rest = set(state_dict.keys()) - set(mapping.keys())
50 |     assert len(rest) == 0
51 | 


--------------------------------------------------------------------------------
/lib/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Environment helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import os
24 | import sys
25 | 
26 | # Default value of the CMake install prefix
27 | _CMAKE_INSTALL_PREFIX = '/usr/local'
28 | 
29 | 
30 | def get_runtime_dir():
31 |     """Retrieve the path to the runtime directory."""
32 |     return os.getcwd()
33 | 
34 | 
35 | def get_py_bin_ext():
36 |     """Retrieve python binary extension."""
37 |     return '.py'
38 | 
39 | 
40 | def set_up_matplotlib():
41 |     """Set matplotlib up."""
42 |     import matplotlib
43 |     # Use a non-interactive backend
44 |     matplotlib.use('Agg')
45 | 
46 | 
47 | def exit_on_error():
48 |     """Exit from a detectron tool when there's an error."""
49 |     sys.exit(1)
50 | 


--------------------------------------------------------------------------------
/lib/utils/fpn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import utils.boxes as box_utils
 4 | from core.config import cfg
 5 | 
 6 | 
 7 | # ---------------------------------------------------------------------------- #
 8 | # Helper functions for working with multilevel FPN RoIs
 9 | # ---------------------------------------------------------------------------- #
10 | 
11 | def map_rois_to_fpn_levels(rois, k_min, k_max):
12 |     """Determine which FPN level each RoI in a set of RoIs should map to based
13 |     on the heuristic in the FPN paper.
14 |     """
15 |     # Compute level ids
16 |     areas, neg_idx = box_utils.boxes_area(rois)
17 |     areas[neg_idx] = 0  # np.sqrt will remove the entries with negative value
18 |     s = np.sqrt(areas)
19 |     s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
20 |     lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
21 | 
22 |     # Eqn.(1) in FPN paper
23 |     target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
24 |     target_lvls = np.clip(target_lvls, k_min, k_max)
25 | 
26 |     # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs
27 |     # target_lvls[neg_idx] = -1
28 |     return target_lvls
29 | 
30 | 
31 | def add_multilevel_roi_blobs(
32 |         blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max
33 |     ):
34 |     """Add RoI blobs for multiple FPN levels to the blobs dict.
35 | 
36 |     blobs: a dict mapping from blob name to numpy ndarray
37 |     blob_prefix: name prefix to use for the FPN blobs
38 |     rois: the source rois as a 2D numpy array of shape (N, 5) where each row is
39 |       an roi and the columns encode (batch_idx, x1, y1, x2, y2)
40 |     target_lvls: numpy array of shape (N, ) indicating which FPN level each roi
41 |       in rois should be assigned to. -1 means correspoind roi should be discarded.
42 |     lvl_min: the finest (highest resolution) FPN level (e.g., 2)
43 |     lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
44 |     """
45 |     rois_idx_order = np.empty((0, ))
46 |     rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
47 |     # target_lvls = remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls)
48 |     for lvl in range(lvl_min, lvl_max + 1):
49 |         idx_lvl = np.where(target_lvls == lvl)[0]
50 |         blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
51 |         rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
52 |         rois_stacked = np.vstack(
53 |             [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]
54 |         )
55 |     rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
56 |     blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
57 |     # Sanity check that restore order is correct
58 |     assert (rois_stacked[rois_idx_restore] == rois).all()
59 | 
60 | 
61 | def remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls):
62 |     """ Delete roi entries that have negative area (Uncompleted) """
63 |     idx_neg = np.where(target_lvls == -1)[0]
64 |     rois = np.delete(rois, idx_neg, axis=0)
65 |     blobs[blob_prefix] = rois
66 |     target_lvls = np.delete(target_lvls, idx_neg, axis=0)
67 |     #TODO: other blobs in faster_rcnn.get_fast_rcnn_blob_names should also be modified
68 |     return target_lvls
69 | 


--------------------------------------------------------------------------------
/lib/utils/image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Image helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import cv2
24 | import numpy as np
25 | 
26 | 
27 | def aspect_ratio_rel(im, aspect_ratio):
28 |     """Performs width-relative aspect ratio transformation."""
29 |     im_h, im_w = im.shape[:2]
30 |     im_ar_w = int(round(aspect_ratio * im_w))
31 |     im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
32 |     return im_ar
33 | 
34 | 
35 | def aspect_ratio_abs(im, aspect_ratio):
36 |     """Performs absolute aspect ratio transformation."""
37 |     im_h, im_w = im.shape[:2]
38 |     im_area = im_h * im_w
39 | 
40 |     im_ar_w = np.sqrt(im_area * aspect_ratio)
41 |     im_ar_h = np.sqrt(im_area / aspect_ratio)
42 |     assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
43 | 
44 |     im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
45 |     return im_ar
46 | 


--------------------------------------------------------------------------------
/lib/utils/logging.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Utilities for logging."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | from collections import deque
 24 | from email.mime.text import MIMEText
 25 | import json
 26 | import logging
 27 | import numpy as np
 28 | import smtplib
 29 | import sys
 30 | 
 31 | from core.config import cfg
 32 | 
 33 | # Print lower precision floating point values than default FLOAT_REPR
 34 | # Note! Has no use for json encode with C speedups
 35 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f')
 36 | 
 37 | 
 38 | def log_json_stats(stats, sort_keys=True):
 39 |     print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))
 40 | 
 41 | 
 42 | def log_stats(stats, misc_args):
 43 |     """Log training statistics to terminal"""
 44 |     if hasattr(misc_args, 'epoch'):
 45 |         lines = "[%s][%s][Epoch %d][Iter %d / %d]\n" % (
 46 |             misc_args.run_name, misc_args.cfg_filename,
 47 |             misc_args.epoch, misc_args.step, misc_args.iters_per_epoch)
 48 |     else:
 49 |         lines = "[%s][%s][Step %d / %d]\n" % (
 50 |             misc_args.run_name, misc_args.cfg_filename, stats['iter'], cfg.SOLVER.MAX_ITER)
 51 | 
 52 |     lines += "\t\tloss: %.6f, lr: %.6f time: %.6f, eta: %s\n" % (
 53 |         stats['loss'], stats['lr'], stats['time'], stats['eta']
 54 |     )
 55 |     if stats['metrics']:
 56 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['metrics'].items()) + "\n"
 57 |     if stats['head_losses']:
 58 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['head_losses'].items()) + "\n"
 59 |     if cfg.RPN.RPN_ON:
 60 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_losses'].items()) + "\n"
 61 |     if cfg.FPN.FPN_ON:
 62 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_cls_losses'].items()) + "\n"
 63 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_bbox_losses'].items()) + "\n"
 64 |     print(lines[:-1])  # remove last new line
 65 | 
 66 | 
 67 | class SmoothedValue(object):
 68 |     """Track a series of values and provide access to smoothed values over a
 69 |     window or the global series average.
 70 |     """
 71 | 
 72 |     def __init__(self, window_size):
 73 |         self.deque = deque(maxlen=window_size)
 74 |         self.series = []
 75 |         self.total = 0.0
 76 |         self.count = 0
 77 | 
 78 |     def AddValue(self, value):
 79 |         self.deque.append(value)
 80 |         self.series.append(value)
 81 |         self.count += 1
 82 |         self.total += value
 83 | 
 84 |     def GetMedianValue(self):
 85 |         return np.median(self.deque)
 86 | 
 87 |     def GetAverageValue(self):
 88 |         return np.mean(self.deque)
 89 | 
 90 |     def GetGlobalAverageValue(self):
 91 |         return self.total / self.count
 92 | 
 93 | 
 94 | def send_email(subject, body, to):
 95 |     s = smtplib.SMTP('localhost')
 96 |     mime = MIMEText(body)
 97 |     mime['Subject'] = subject
 98 |     mime['To'] = to
 99 |     s.sendmail('detectron', to, mime.as_string())
100 | 
101 | 
102 | def setup_logging(name):
103 |     FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
104 |     # Manually clear root loggers to prevent any module that may have called
105 |     # logging.basicConfig() from blocking our logging setup
106 |     logging.root.handlers = []
107 |     logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
108 |     logger = logging.getLogger(name)
109 |     return logger
110 | 


--------------------------------------------------------------------------------
/lib/utils/resnet_weights_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for converting resnet pretrained weights from other formats
 3 | """
 4 | import os
 5 | import pickle
 6 | 
 7 | import torch
 8 | 
 9 | import nn as mynn
10 | import utils.detectron_weight_helper as dwh
11 | from core.config import cfg
12 | 
13 | 
14 | def load_pretrained_imagenet_weights(model):
15 |     """Load pretrained weights
16 |     Args:
17 |         num_layers: 50 for res50 and so on.
18 |         model: the generalized rcnnn module
19 |     """
20 |     _, ext = os.path.splitext(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS)
21 |     if ext == '.pkl':
22 |         with open(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS, 'rb') as fp:
23 |             src_blobs = pickle.load(fp, encoding='latin1')
24 |         if 'blobs' in src_blobs:
25 |             src_blobs = src_blobs['blobs']
26 |         pretrianed_state_dict = src_blobs
27 |     else:
28 |         weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS)
29 |         pretrianed_state_dict = convert_state_dict(torch.load(weights_file))
30 | 
31 |         # Convert batchnorm weights
32 |         for name, mod in model.named_modules():
33 |             if isinstance(mod, mynn.AffineChannel2d):
34 |                 if cfg.FPN.FPN_ON:
35 |                     pretrianed_name = name.split('.', 2)[-1]
36 |                 else:
37 |                     pretrianed_name = name.split('.', 1)[-1]
38 |                 bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean']
39 |                 bn_var = pretrianed_state_dict[pretrianed_name + '.running_var']
40 |                 scale = pretrianed_state_dict[pretrianed_name + '.weight']
41 |                 bias = pretrianed_state_dict[pretrianed_name + '.bias']
42 |                 std = torch.sqrt(bn_var + 1e-5)
43 |                 new_scale = scale / std
44 |                 new_bias = bias - bn_mean * scale / std
45 |                 pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale
46 |                 pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias
47 | 
48 |     model_state_dict = model.state_dict()
49 | 
50 |     pattern = dwh.resnet_weights_name_pattern()
51 | 
52 |     name_mapping, _ = model.detectron_weight_mapping
53 | 
54 |     for k, v in name_mapping.items():
55 |         if isinstance(v, str):  # maybe a str, None or True
56 |             if pattern.match(v):
57 |                 if cfg.FPN.FPN_ON:
58 |                     pretrianed_key = k.split('.', 2)[-1]
59 |                 else:
60 |                     pretrianed_key = k.split('.', 1)[-1]
61 |                 if ext == '.pkl':
62 |                     model_state_dict[k].copy_(torch.Tensor(pretrianed_state_dict[v]))
63 |                 else:
64 |                     model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key])
65 | 
66 | 
67 | def convert_state_dict(src_dict):
68 |     """Return the correct mapping of tensor name and value
69 | 
70 |     Mapping from the names of torchvision model to our resnet conv_body and box_head.
71 |     """
72 |     dst_dict = {}
73 |     for k, v in src_dict.items():
74 |         toks = k.split('.')
75 |         if k.startswith('layer'):
76 |             assert len(toks[0]) == 6
77 |             res_id = int(toks[0][5]) + 1
78 |             name = '.'.join(['res%d' % res_id] + toks[1:])
79 |             dst_dict[name] = v
80 |         elif k.startswith('fc'):
81 |             continue
82 |         else:
83 |             name = '.'.join(['res1'] + toks)
84 |             dst_dict[name] = v
85 |     return dst_dict
86 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import time
 7 | 
 8 | 
 9 | class Timer(object):
10 |   """A simple timer."""
11 | 
12 |   def __init__(self):
13 |     self.reset()
14 | 
15 |   def tic(self):
16 |     # using time.time instead of time.clock because time time.clock
17 |     # does not normalize for multithreading
18 |     self.start_time = time.time()
19 | 
20 |   def toc(self, average=True):
21 |     self.diff = time.time() - self.start_time
22 |     self.total_time += self.diff
23 |     self.calls += 1
24 |     self.average_time = self.total_time / self.calls
25 |     if average:
26 |       return self.average_time
27 |     else:
28 |       return self.diff
29 | 
30 |   def reset(self):
31 |     self.total_time = 0.
32 |     self.calls = 0
33 |     self.start_time = 0.
34 |     self.diff = 0.
35 |     self.average_time = 0.
36 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
 2 | 
 3 | Usage:
 4 | import this module before import any modules under lib/
 5 | e.g 
 6 |     import _init_paths
 7 |     from core.config import cfg
 8 | """ 
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
19 | 
20 | # Add lib to PYTHONPATH
21 | lib_path = osp.join(this_dir, 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/tools/download_imagenet_weights.py:
--------------------------------------------------------------------------------
 1 | """Script to downlaod ImageNet pretrained weights from Google Drive
 2 | 
 3 | Extra packages required to run the script:
 4 |     colorama, argparse_color_formatter
 5 | """
 6 | 
 7 | import argparse
 8 | import os
 9 | import requests
10 | from argparse_color_formatter import ColorHelpFormatter
11 | from colorama import init, Fore
12 | 
13 | import _init_paths  # pylint: disable=unused-import
14 | from core.config import cfg
15 | 
16 | 
17 | def parse_args():
18 |     """Parser command line argumnets"""
19 |     parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter)
20 |     parser.add_argument('--output_dir', help='Directory to save downloaded weight files',
21 |                         default=os.path.join(cfg.DATA_DIR, 'pretrained_model'))
22 |     parser.add_argument('-t', '--targets', nargs='+', metavar='file_name',
23 |                         help='Files to download. Allowed values are: ' +
24 |                         ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET,
25 |                                       list(PRETRAINED_WEIGHTS.keys()))),
26 |                         choices=list(PRETRAINED_WEIGHTS.keys()),
27 |                         default=list(PRETRAINED_WEIGHTS.keys()))
28 |     return parser.parse_args()
29 | 
30 | 
31 | # ---------------------------------------------------------------------------- #
32 | # Mapping from filename to google drive file_id
33 | # ---------------------------------------------------------------------------- #
34 | PRETRAINED_WEIGHTS = {
35 |     'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1',
36 |     'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l',
37 |     'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF',
38 |     'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k',
39 | }
40 | 
41 | 
42 | # ---------------------------------------------------------------------------- #
43 | # Helper fucntions for download file from google drive
44 | # ---------------------------------------------------------------------------- #
45 | 
46 | def download_file_from_google_drive(id, destination):
47 |     URL = "https://docs.google.com/uc?export=download"
48 | 
49 |     session = requests.Session()
50 | 
51 |     response = session.get(URL, params={'id': id}, stream=True)
52 |     token = get_confirm_token(response)
53 | 
54 |     if token:
55 |         params = {'id': id, 'confirm': token}
56 |         response = session.get(URL, params=params, stream=True)
57 | 
58 |     save_response_content(response, destination)
59 | 
60 | 
61 | def get_confirm_token(response):
62 |     for key, value in response.cookies.items():
63 |         if key.startswith('download_warning'):
64 |             return value
65 | 
66 |     return None
67 | 
68 | 
69 | def save_response_content(response, destination):
70 |     CHUNK_SIZE = 32768
71 | 
72 |     with open(destination, "wb") as f:
73 |         for chunk in response.iter_content(CHUNK_SIZE):
74 |             if chunk:  # filter out keep-alive new chunks
75 |                 f.write(chunk)
76 | 
77 | 
78 | def main():
79 |     init()  # colorama init. Only has effect on Windows
80 |     args = parse_args()
81 |     for filename in args.targets:
82 |         file_id = PRETRAINED_WEIGHTS[filename]
83 |         if not os.path.exists(args.output_dir):
84 |             os.makedirs(args.output_dir)
85 |         destination = os.path.join(args.output_dir, filename)
86 |         download_file_from_google_drive(file_id, destination)
87 |         print('Download {} to {}'.format(filename, destination))
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
  1 | """Perform inference on one or more datasets."""
  2 | 
  3 | import argparse
  4 | import cv2
  5 | import os
  6 | import pprint
  7 | import sys
  8 | import time
  9 | 
 10 | import torch
 11 | 
 12 | import _init_paths  # pylint: disable=unused-import
 13 | from core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg
 14 | from core.test_engine import run_inference
 15 | import utils.logging
 16 | 
 17 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not
 18 | # thread safe and causes unwanted GPU memory allocations.
 19 | cv2.ocl.setUseOpenCL(False)
 20 | 
 21 | 
 22 | def parse_args():
 23 |     """Parse in command line arguments"""
 24 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 25 |     parser.add_argument(
 26 |         '--dataset',
 27 |         help='training dataset')
 28 |     parser.add_argument(
 29 |         '--cfg', dest='cfg_file', required=True,
 30 |         help='optional config file')
 31 | 
 32 |     parser.add_argument(
 33 |         '--load_ckpt', help='path of checkpoint to load')
 34 |     parser.add_argument(
 35 |         '--load_detectron', help='path to the detectron weight pickle file')
 36 | 
 37 |     parser.add_argument(
 38 |         '--output_dir',
 39 |         help='output directory to save the testing results. If not provided, '
 40 |              'defaults to [args.load_ckpt|args.load_detectron]/../test.')
 41 | 
 42 |     parser.add_argument(
 43 |         '--set', dest='set_cfgs',
 44 |         help='set config keys, will overwrite config in the cfg_file.'
 45 |              ' See lib/core/config.py for all options',
 46 |         default=[], nargs='*')
 47 | 
 48 |     parser.add_argument(
 49 |         '--range',
 50 |         help='start (inclusive) and end (exclusive) indices',
 51 |         type=int, nargs=2)
 52 |     parser.add_argument(
 53 |         '--multi-gpu-testing', help='using multiple gpus for inference',
 54 |         action='store_true')
 55 |     parser.add_argument(
 56 |         '--vis', dest='vis', help='visualize detections', action='store_true')
 57 | 
 58 |     return parser.parse_args()
 59 | 
 60 | 
 61 | if __name__ == '__main__':
 62 | 
 63 |     if not torch.cuda.is_available():
 64 |         sys.exit("Need a CUDA device to run the code.")
 65 | 
 66 |     logger = utils.logging.setup_logging(__name__)
 67 |     args = parse_args()
 68 |     logger.info('Called with args:')
 69 |     logger.info(args)
 70 | 
 71 |     assert (torch.cuda.device_count() == 1) ^ bool(args.multi_gpu_testing)
 72 | 
 73 |     assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
 74 |         'Exactly one of --load_ckpt and --load_detectron should be specified.'
 75 |     if args.output_dir is None:
 76 |         ckpt_path = args.load_ckpt if args.load_ckpt else args.load_detectron
 77 |         args.output_dir = os.path.join(
 78 |             os.path.dirname(os.path.dirname(ckpt_path)), 'test')
 79 |         logger.info('Automatically set output directory to %s', args.output_dir)
 80 |     if not os.path.exists(args.output_dir):
 81 |         os.makedirs(args.output_dir)
 82 | 
 83 |     cfg.VIS = args.vis
 84 | 
 85 |     if args.cfg_file is not None:
 86 |         merge_cfg_from_file(args.cfg_file)
 87 |     if args.set_cfgs is not None:
 88 |         merge_cfg_from_list(args.set_cfgs)
 89 | 
 90 |     if args.dataset == "coco2017":
 91 |         cfg.TEST.DATASETS = ('coco_2017_val',)
 92 |         cfg.MODEL.NUM_CLASSES = 81
 93 |     elif args.dataset == "keypoints_coco2017":
 94 |         cfg.TEST.DATASETS = ('keypoints_coco_2017_val',)
 95 |         cfg.MODEL.NUM_CLASSES = 2
 96 |     else:  # For subprocess call
 97 |         assert cfg.TEST.DATASETS, 'cfg.TEST.DATASETS shouldn\'t be empty'
 98 |     assert_and_infer_cfg()
 99 | 
100 |     logger.info('Testing with config:')
101 |     logger.info(pprint.pformat(cfg))
102 | 
103 |     # For test_engine.multi_gpu_test_net_on_dataset
104 |     args.test_net_file, _ = os.path.splitext(__file__)
105 |     # manually set args.cuda
106 |     args.cuda = True
107 | 
108 |     run_inference(
109 |         args,
110 |         ind_range=args.range,
111 |         multi_gpu_testing=args.multi_gpu_testing,
112 |         check_expected_results=True)
113 | 


--------------------------------------------------------------------------------