├── .github
    └── issue_template.md
├── .gitignore
├── .pylintrc
├── .travis.yml
├── .vscode
    └── settings.json
├── BENCHMARK.md
├── LICENSE
├── README.md
├── configs
    ├── baselines
    │   ├── e2e_faster_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_R-101-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_R-50-C4_1x.yaml
    │   ├── e2e_faster_rcnn_R-50-C4_2x.yaml
    │   ├── e2e_faster_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_R-50-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
    │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
    │   ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
    │   ├── e2e_mask_rcnn_R-101-C4_2x.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_R-50-C4_1x.yaml
    │   ├── e2e_mask_rcnn_R-50-C4_2x.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
    │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
    │   ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
    │   └── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
    ├── getting_started
    │   ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
    │   └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
    └── gn_baselines
    │   ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
    │   ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
    │   ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
    │   ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
    │   └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
├── demo
    ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg
    ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg
    ├── 33823288584_1d21cf0a26_k.jpg
    ├── convert_pdf2img.sh
    ├── e2e_mask_rcnn_R-50-C4
    │   └── train_from_scratch_epoch1_bs4
    │   │   ├── img1.jpg
    │   │   ├── img2.jpg
    │   │   ├── img3.jpg
    │   │   └── img4.jpg
    ├── img1_keypoints-detectron-R50-FPN.jpg
    ├── img1_keypoints-pydetectron-R50-FPN.jpg
    ├── img2_keypoints-detectron-R50-FPN.jpg
    ├── img2_keypoints-pydetectron-R50-FPN.jpg
    ├── loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg
    ├── loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg
    ├── loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg
    ├── loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg
    ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg
    ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg
    ├── sample_images
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   ├── img3.jpg
    │   └── img4.jpg
    └── sample_images_keypoints
    │   ├── img1_keypoints.jpg
    │   └── img2_keypoints.jpg
├── lib
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   ├── test.py
    │   └── test_engine.py
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── cityscapes
    │   │   ├── __init__.py
    │   │   ├── coco_to_cityscapes_id.py
    │   │   └── tools
    │   │   │   ├── convert_cityscapes_to_coco.py
    │   │   │   └── convert_coco_model_to_cityscapes.py
    │   ├── cityscapes_json_dataset_evaluator.py
    │   ├── dataset_catalog.py
    │   ├── dummy_datasets.py
    │   ├── json_dataset.py
    │   ├── json_dataset_evaluator.py
    │   ├── roidb.py
    │   ├── task_evaluation.py
    │   ├── voc_dataset_evaluator.py
    │   └── voc_eval.py
    ├── make.sh
    ├── modeling
    │   ├── FPN.py
    │   ├── ResNet.py
    │   ├── __init__.py
    │   ├── collect_and_distribute_fpn_rpn_proposals.py
    │   ├── fast_rcnn_heads.py
    │   ├── generate_anchors.py
    │   ├── generate_proposal_labels.py
    │   ├── generate_proposals.py
    │   ├── keypoint_rcnn_heads.py
    │   ├── mask_rcnn_heads.py
    │   ├── model_builder.py
    │   └── rpn_heads.py
    ├── nn
    │   ├── __init__.py
    │   ├── functional.py
    │   ├── init.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── affine.py
    │   │   ├── normalization.py
    │   │   └── upsample.py
    │   └── parallel
    │   │   ├── __init__.py
    │   │   ├── _functions.py
    │   │   ├── data_parallel.py
    │   │   ├── parallel_apply.py
    │   │   ├── replicate.py
    │   │   └── scatter_gather.py
    ├── roi_data
    │   ├── __init__.py
    │   ├── data_utils.py
    │   ├── fast_rcnn.py
    │   ├── keypoint_rcnn.py
    │   ├── loader.py
    │   ├── mask_rcnn.py
    │   ├── minibatch.py
    │   └── rpn.py
    ├── setup.py
    └── utils
    │   ├── __init__.py
    │   ├── blob.py
    │   ├── boxes.py
    │   ├── collections.py
    │   ├── colormap.py
    │   ├── cython_bbox.c
    │   ├── cython_bbox.pyx
    │   ├── cython_nms.c
    │   ├── cython_nms.pyx
    │   ├── detectron_weight_helper.py
    │   ├── env.py
    │   ├── fpn.py
    │   ├── image.py
    │   ├── io.py
    │   ├── keypoints.py
    │   ├── logging.py
    │   ├── misc.py
    │   ├── net.py
    │   ├── net_utils.py
    │   ├── resnet_weights_helper.py
    │   ├── segms.py
    │   ├── subprocess.py
    │   ├── timer.py
    │   ├── training_stats.py
    │   └── vis.py
└── tools
    ├── _init_paths.py
    ├── download_imagenet_weights.py
    ├── infer_simple.py
    ├── pascal_voc_xml2coco_json_converter.py
    ├── test_net.py
    ├── train_net.py
    └── train_net_step.py


/.github/issue_template.md:
--------------------------------------------------------------------------------
 1 | ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING
 2 | 1. **Read the README.md thoroughly ! README.md is not a decoration.**
 3 | 2. Please search existing *open and closed* issues in case your issue has already been reported
 4 | 3. Please try to debug the issue in case you can solve it on your own before posting
 5 | 
 6 | ## After following steps above and agreeing to provide the detailed information requested below, you may continue with posting your issue
 7 | (**Delete this line and the text above it.**)
 8 | 
 9 | ### Expected results
10 | 
11 | What did you expect to see?
12 | 
13 | ### Actual results
14 | 
15 | What did you observe instead?
16 | 
17 | ### Detailed steps to reproduce
18 | 
19 | E.g.:
20 | 
21 | ```
22 | The command that you ran
23 | ```
24 | 
25 | ### System information
26 | 
27 | * Operating system: ?
28 | * CUDA version: ?
29 | * cuDNN version: ?
30 | * GPU models (for all devices if they are not all the same): ?
31 | * python version: ?
32 | * pytorch version: ?
33 | * Anything else that seems relevant: ?
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/*
 2 | *.pyc
 3 | *~
 4 | 
 5 | *.o
 6 | *.so
 7 | 
 8 | .ipynb_checkpoints
 9 | notebooks/*.pkl
10 | 
11 | /Outputs
12 | lib/build
13 | lib/detectron_pytorch.egg-info
14 | 
15 | # ------------------------------
16 | 
17 | .vscode/*
18 | !.vscode/settings.json
19 | !.vscode/tasks.json
20 | !.vscode/launch.json
21 | !.vscode/extensions.json
22 | 
23 | # General
24 | .DS_Store
25 | .AppleDouble
26 | .LSOverride
27 | 
28 | # Icon must end with two \r
29 | Icon
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | .com.apple.timemachine.donotpresent
42 | 
43 | # Directories potentially created on remote AFP share
44 | .AppleDB
45 | .AppleDesktop
46 | Network Trash Folder
47 | Temporary Items
48 | .apdisk
49 | 
50 | *~
51 | 
52 | # temporary files which can be created if a process still has a handle open of a deleted file
53 | .fuse_hidden*
54 | 
55 | # KDE directory preferences
56 | .directory
57 | 
58 | # Linux trash folder which might appear on any partition or disk
59 | .Trash-*
60 | 
61 | # .nfs files are created when an open file is removed but is still being accessed
62 | .nfs*
63 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | extension-pkg-whitelist=numpy,torch,cv2
3 | init-hook="sys.path.insert(0, './tools'); import _init_paths"
4 | 
5 | [MESSAGES CONTROL]
6 | disable=wrong-import-position
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | group: travis_latest
 2 | language: python
 3 | cache: pip
 4 | python:
 5 |     - 3.6
 6 |     #- nightly
 7 |     #- pypy3
 8 | matrix:
 9 |     allow_failures:
10 |         - python: nightly
11 |         - python: pypy3
12 | install:
13 |     #- pip install -r requirements.txt
14 |     - pip install flake8  # pytest  # add another testing frameworks later
15 | before_script:
16 |     # stop the build if there are Python syntax errors or undefined names
17 |     - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
18 |     # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
19 |     - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
20 | script:
21 |     - true  # pytest --capture=sys  # add other tests here
22 | notifications:
23 |     on_success: change
24 |     on_failure: change  # `always` will be the setting once code changes slow down
25 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.pylintEnabled": true,
3 |     "python.linting.flake8Enabled": false,
4 |     "python.autoComplete.extraPaths": ["${workspaceRoot}/lib"],
5 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Roy Tseng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 90000
14 |   STEPS: [0, 60000, 80000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 
36 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | RPN:
17 |   SIZES: (32, 64, 128, 256, 512)
18 | FAST_RCNN:
19 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
20 |   ROI_XFORM_METHOD: RoIAlign
21 | TRAIN:
22 |   SCALES: (800,)
23 |   MAX_SIZE: 1333
24 |   IMS_PER_BATCH: 1
25 |   BATCH_SIZE_PER_IM: 512
26 | TEST:
27 |   SCALE: 800
28 |   MAX_SIZE: 1333
29 |   NMS: 0.5
30 |   RPN_PRE_NMS_TOP_N: 6000
31 |   RPN_POST_NMS_TOP_N: 1000
32 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
14 |   MAX_ITER: 360000
15 |   STEPS: [0, 240000, 320000]
16 | RPN:
17 |   SIZES: (32, 64, 128, 256, 512)
18 | FAST_RCNN:
19 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
20 |   ROI_XFORM_METHOD: RoIAlign
21 | TRAIN:
22 |   SCALES: (800,)
23 |   MAX_SIZE: 1333
24 |   IMS_PER_BATCH: 1
25 |   BATCH_SIZE_PER_IM: 512
26 | TEST:
27 |   SCALE: 800
28 |   MAX_SIZE: 1333
29 |   NMS: 0.5
30 |   RPN_PRE_NMS_TOP_N: 6000
31 |   RPN_POST_NMS_TOP_N: 1000
32 | 
33 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 90000
14 |   STEPS: [0, 60000, 80000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 |   ROI_XFORM_RESOLUTION: 7
23 |   ROI_XFORM_SAMPLING_RATIO: 2
24 | TRAIN:
25 |   SCALES: (800,)
26 |   MAX_SIZE: 1333
27 |   BATCH_SIZE_PER_IM: 512
28 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
29 | TEST:
30 |   SCALE: 800
31 |   MAX_SIZE: 1333
32 |   NMS: 0.5
33 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
34 |   RPN_POST_NMS_TOP_N: 1000
35 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 360000
13 |   STEPS: [0, 240000, 320000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 | NUM_GPUS: 8
 6 | SOLVER:
 7 |   WEIGHT_DECAY: 0.0001
 8 |   LR_POLICY: steps_with_decay
 9 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
10 |   BASE_LR: 0.01
11 |   GAMMA: 0.1
12 |   MAX_ITER: 360000
13 |   STEPS: [0, 240000, 320000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
35 | TEST:
36 |   SCALE: 800
37 |   MAX_SIZE: 1333
38 |   NMS: 0.5
39 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
40 |   RPN_POST_NMS_TOP_N: 1000
41 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 130000
15 |   STEPS: [0, 100000, 120000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | KRCNN:
26 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
27 |   NUM_STACKED_CONVS: 8
28 |   NUM_KEYPOINTS: 17
29 |   USE_DECONV_OUTPUT: True
30 |   CONV_INIT: MSRAFill
31 |   CONV_HEAD_DIM: 512
32 |   UP_SCALE: 2
33 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14
36 |   ROI_XFORM_SAMPLING_RATIO: 2
37 |   KEYPOINT_CONFIDENCE: bbox
38 | TRAIN:
39 |   SCALES: (640, 672, 704, 736, 768, 800)
40 |   MAX_SIZE: 1333
41 |   BATCH_SIZE_PER_IM: 512
42 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
43 | TEST:
44 |   SCALE: 800
45 |   MAX_SIZE: 1333
46 |   NMS: 0.5
47 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
48 |   RPN_POST_NMS_TOP_N: 1000
49 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 90000
13 |   STEPS: [0, 60000, 80000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 130000
13 |   STEPS: [0, 100000, 120000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 32
23 |   WIDTH_PER_GROUP: 8
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 90000
13 |   STEPS: [0, 60000, 80000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   KEYPOINTS_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 130000
13 |   STEPS: [0, 100000, 120000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 | RESNETS:
19 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
20 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
21 |   TRANS_FUNC: bottleneck_transformation
22 |   NUM_GROUPS: 64
23 |   WIDTH_PER_GROUP: 4
24 | FAST_RCNN:
25 |   ROI_BOX_HEAD: head_builder.roi_2mlp_head
26 |   ROI_XFORM_METHOD: RoIAlign
27 |   ROI_XFORM_RESOLUTION: 7
28 |   ROI_XFORM_SAMPLING_RATIO: 2
29 | KRCNN:
30 |   ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX
31 |   NUM_STACKED_CONVS: 8
32 |   NUM_KEYPOINTS: 17
33 |   USE_DECONV_OUTPUT: True
34 |   CONV_INIT: MSRAFill
35 |   CONV_HEAD_DIM: 512
36 |   UP_SCALE: 2
37 |   HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 14
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 |   KEYPOINT_CONFIDENCE: bbox
42 | TRAIN:
43 |   SCALES: (640, 672, 704, 736, 768, 800)
44 |   MAX_SIZE: 1333
45 |   BATCH_SIZE_PER_IM: 512
46 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
47 | TEST:
48 |   SCALE: 800
49 |   MAX_SIZE: 1333
50 |   NMS: 0.5
51 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
52 |   RPN_POST_NMS_TOP_N: 1000
53 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet101_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   NUM_CLASSES: 81
 5 |   FASTER_RCNN: True
 6 |   MASK_ON: True
 7 | RESNETS:
 8 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 9 | NUM_GPUS: 8
10 | SOLVER:
11 |   WEIGHT_DECAY: 0.0001
12 |   LR_POLICY: steps_with_decay
13 |   BASE_LR: 0.02
14 |   GAMMA: 0.1
15 |   MAX_ITER: 90000
16 |   STEPS: [0, 60000, 80000]
17 | FPN:
18 |   FPN_ON: True
19 |   MULTILEVEL_ROIS: True
20 |   MULTILEVEL_RPN: True
21 | FAST_RCNN:
22 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
23 |   ROI_XFORM_METHOD: RoIAlign
24 |   ROI_XFORM_RESOLUTION: 7
25 |   ROI_XFORM_SAMPLING_RATIO: 2
26 | MRCNN:
27 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
28 |   RESOLUTION: 28  # (output mask resolution) default 14
29 |   ROI_XFORM_METHOD: RoIAlign
30 |   ROI_XFORM_RESOLUTION: 14  # default 7
31 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
32 |   DILATION: 1  # default 2
33 |   CONV_INIT: MSRAFill  # default GaussianFill
34 | TRAIN:
35 |   SCALES: (800,)
36 |   MAX_SIZE: 1333
37 |   BATCH_SIZE_PER_IM: 512
38 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
39 | TEST:
40 |   SCALE: 800
41 |   MAX_SIZE: 1333
42 |   NMS: 0.5
43 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
44 |   RPN_POST_NMS_TOP_N: 1000
45 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 180000
16 |   STEPS: [0, 120000, 160000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: ResNet.ResNet50_conv4_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
15 |   MAX_ITER: 360000
16 |   STEPS: [0, 240000, 320000]
17 | RPN:
18 |   SIZES: (32, 64, 128, 256, 512)
19 | FAST_RCNN:
20 |   ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head
21 |   ROI_XFORM_METHOD: RoIAlign
22 | MRCNN:
23 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
24 |   RESOLUTION: 14
25 |   ROI_XFORM_METHOD: RoIAlign
26 |   ROI_XFORM_RESOLUTION: 14
27 |   DILATION: 1  # default 2
28 |   CONV_INIT: MSRAFill  # default: GaussianFill
29 | TRAIN:
30 |   SCALES: (800,)
31 |   MAX_SIZE: 1333
32 |   IMS_PER_BATCH: 1
33 |   BATCH_SIZE_PER_IM: 512
34 | TEST:
35 |   SCALE: 800
36 |   MAX_SIZE: 1333
37 |   NMS: 0.5
38 |   RPN_PRE_NMS_TOP_N: 6000
39 |   RPN_POST_NMS_TOP_N: 1000
40 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 90000
15 |   STEPS: [0, 60000, 80000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | RESNETS:
 7 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 8 | NUM_GPUS: 8
 9 | SOLVER:
10 |   WEIGHT_DECAY: 0.0001
11 |   LR_POLICY: steps_with_decay
12 |   BASE_LR: 0.02
13 |   GAMMA: 0.1
14 |   MAX_ITER: 180000
15 |   STEPS: [0, 120000, 160000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | FAST_RCNN:
21 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
22 |   ROI_XFORM_METHOD: RoIAlign
23 |   ROI_XFORM_RESOLUTION: 7
24 |   ROI_XFORM_SAMPLING_RATIO: 2
25 | MRCNN:
26 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
27 |   RESOLUTION: 28  # (output mask resolution) default 14
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 14  # default 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
31 |   DILATION: 1  # default 2
32 |   CONV_INIT: MSRAFill  # default GaussianFill
33 | TRAIN:
34 |   SCALES: (800,)
35 |   MAX_SIZE: 1333
36 |   BATCH_SIZE_PER_IM: 512
37 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
38 | TEST:
39 |   SCALE: 800
40 |   MAX_SIZE: 1333
41 |   NMS: 0.5
42 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
43 |   RPN_POST_NMS_TOP_N: 1000
44 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 32
24 |   WIDTH_PER_GROUP: 8
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 360000
14 |   STEPS: [0, 240000, 320000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 32
24 |   WIDTH_PER_GROUP: 8
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 180000
14 |   STEPS: [0, 120000, 160000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 64
24 |   WIDTH_PER_GROUP: 4
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 360000
14 |   STEPS: [0, 240000, 320000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_transformation
23 |   NUM_GROUPS: 64
24 |   WIDTH_PER_GROUP: 4
25 | FAST_RCNN:
26 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
27 |   ROI_XFORM_METHOD: RoIAlign
28 |   ROI_XFORM_RESOLUTION: 7
29 |   ROI_XFORM_SAMPLING_RATIO: 2
30 | MRCNN:
31 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
32 |   RESOLUTION: 28  # (output mask resolution) default 14
33 |   ROI_XFORM_METHOD: RoIAlign
34 |   ROI_XFORM_RESOLUTION: 14  # default 7
35 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
36 |   DILATION: 1  # default 2
37 |   CONV_INIT: MSRAFill  # default GaussianFill
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet152_conv5_body
 4 |   NUM_CLASSES: 81
 5 |   FASTER_RCNN: True
 6 |   MASK_ON: True
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1)
12 |   BASE_LR: 0.01
13 |   GAMMA: 0.1
14 |   MAX_ITER: 260000
15 |   STEPS: [0, 200000, 240000]
16 | FPN:
17 |   FPN_ON: True
18 |   MULTILEVEL_ROIS: True
19 |   MULTILEVEL_RPN: True
20 | RESNETS:
21 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-152-32x8d-IN5k.pkl'
22 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
23 |   TRANS_FUNC: bottleneck_transformation
24 |   NUM_GROUPS: 32
25 |   WIDTH_PER_GROUP: 8
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (640, 672, 704, 736, 768, 800)  # Scale jitter
41 |   MAX_SIZE: 1333
42 |   IMS_PER_BATCH: 1
43 |   BATCH_SIZE_PER_IM: 512
44 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
45 | TEST:
46 |   SCALE: 800
47 |   MAX_SIZE: 1333
48 |   NMS: 0.5
49 |   BBOX_VOTE:
50 |     ENABLED: True
51 |     VOTE_TH: 0.9
52 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
53 |   RPN_POST_NMS_TOP_N: 1000
54 |   BBOX_AUG:
55 |     ENABLED: True
56 |     SCORE_HEUR: UNION
57 |     COORD_HEUR: UNION
58 |     H_FLIP: True
59 |     SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
60 |     MAX_SIZE: 2000
61 |     SCALE_H_FLIP: True
62 |     SCALE_SIZE_DEP: False
63 |     ASPECT_RATIOS: ()
64 |     ASPECT_RATIO_H_FLIP: False
65 |   MASK_AUG:
66 |     ENABLED: True
67 |     HEUR: SOFT_AVG
68 |     H_FLIP: True
69 |     SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
70 |     MAX_SIZE: 2000
71 |     SCALE_H_FLIP: True
72 |     SCALE_SIZE_DEP: False
73 |     ASPECT_RATIOS: ()
74 |     ASPECT_RATIO_H_FLIP: False
75 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 1
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.0025
12 |   GAMMA: 0.1
13 |   MAX_ITER: 60000
14 |   STEPS: [0, 30000, 40000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 2
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.005
12 |   GAMMA: 0.1
13 |   MAX_ITER: 30000
14 |   STEPS: [0, 15000, 20000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 
53 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 4
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.01
12 |   GAMMA: 0.1
13 |   MAX_ITER: 15000
14 |   STEPS: [0, 7500, 10000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 | RESNETS:
 6 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 7500
14 |   STEPS: [0, 3750, 5000]
15 |   # Equivalent schedules with...
16 |   # 1 GPU:
17 |   #   BASE_LR: 0.0025
18 |   #   MAX_ITER: 60000
19 |   #   STEPS: [0, 30000, 40000]
20 |   # 2 GPUs:
21 |   #   BASE_LR: 0.005
22 |   #   MAX_ITER: 30000
23 |   #   STEPS: [0, 15000, 20000]
24 |   # 4 GPUs:
25 |   #   BASE_LR: 0.01
26 |   #   MAX_ITER: 15000
27 |   #   STEPS: [0, 7500, 10000]
28 |   # 8 GPUs:
29 |   #   BASE_LR: 0.02
30 |   #   MAX_ITER: 7500
31 |   #   STEPS: [0, 3750, 5000]
32 | FPN:
33 |   FPN_ON: True
34 |   MULTILEVEL_ROIS: True
35 |   MULTILEVEL_RPN: True
36 | FAST_RCNN:
37 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
38 |   ROI_XFORM_METHOD: RoIAlign
39 |   ROI_XFORM_RESOLUTION: 7
40 |   ROI_XFORM_SAMPLING_RATIO: 2
41 | TRAIN:
42 |   SCALES: (500,)
43 |   MAX_SIZE: 833
44 |   BATCH_SIZE_PER_IM: 256
45 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
46 | TEST:
47 |   SCALE: 500
48 |   MAX_SIZE: 833
49 |   NMS: 0.5
50 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
51 |   RPN_POST_NMS_TOP_N: 1000
52 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 270000
13 |   STEPS: [0, 210000, 250000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 180000
13 |   STEPS: [0, 120000, 160000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 | NUM_GPUS: 8
 7 | SOLVER:
 8 |   WEIGHT_DECAY: 0.0001
 9 |   LR_POLICY: steps_with_decay
10 |   BASE_LR: 0.02
11 |   GAMMA: 0.1
12 |   MAX_ITER: 270000
13 |   STEPS: [0, 210000, 250000]
14 | FPN:
15 |   FPN_ON: True
16 |   MULTILEVEL_ROIS: True
17 |   MULTILEVEL_RPN: True
18 |   USE_GN: True  # Note: use GN on the FPN-specific layers
19 | RESNETS:
20 |   IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl'
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 |   LOAD_IMAGENET_PRETRAINED_WEIGHTS: False
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 270000
14 |   STEPS: [0, 210000, 250000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 |   USE_GN: True  # Note: use GN on the FPN-specific layers
20 | RESNETS:
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   CONV_BODY: FPN.fpn_ResNet50_conv5_body
 4 |   FASTER_RCNN: True
 5 |   MASK_ON: True
 6 |   LOAD_IMAGENET_PRETRAINED_WEIGHTS: False
 7 | NUM_GPUS: 8
 8 | SOLVER:
 9 |   WEIGHT_DECAY: 0.0001
10 |   LR_POLICY: steps_with_decay
11 |   BASE_LR: 0.02
12 |   GAMMA: 0.1
13 |   MAX_ITER: 270000
14 |   STEPS: [0, 210000, 250000]
15 | FPN:
16 |   FPN_ON: True
17 |   MULTILEVEL_ROIS: True
18 |   MULTILEVEL_RPN: True
19 |   USE_GN: True  # Note: use GN on the FPN-specific layers
20 | RESNETS:
21 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
22 |   TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
23 |   STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
24 |   SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
25 |   USE_GN: True
26 | FAST_RCNN:
27 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
28 |   ROI_XFORM_METHOD: RoIAlign
29 |   ROI_XFORM_RESOLUTION: 7
30 |   ROI_XFORM_SAMPLING_RATIO: 2
31 | MRCNN:
32 |   ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
33 |   RESOLUTION: 28  # (output mask resolution) default 14
34 |   ROI_XFORM_METHOD: RoIAlign
35 |   ROI_XFORM_RESOLUTION: 14  # default 7
36 |   ROI_XFORM_SAMPLING_RATIO: 2  # default 0
37 |   DILATION: 1  # default 2
38 |   CONV_INIT: MSRAFill  # default GaussianFill
39 | TRAIN:
40 |   SCALES: (800,)
41 |   MAX_SIZE: 1333
42 |   BATCH_SIZE_PER_IM: 512
43 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
44 | TEST:
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg


--------------------------------------------------------------------------------
/demo/33823288584_1d21cf0a26_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k.jpg


--------------------------------------------------------------------------------
/demo/convert_pdf2img.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pdfdir=''
 4 | 
 5 | while getopts 'd:' flag; do
 6 |     case "$flag" in
 7 |         d) pdfdir=$OPTARG ;;
 8 |     esac
 9 | done
10 | 
11 | for pdf in $(ls ${pdfdir}/img*.pdf); do
12 |     fname="${pdf%.*}"
13 |     convert -density 300x300 -quality 95 $pdf ${fname}.jpg
14 | done
15 | 


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg


--------------------------------------------------------------------------------
/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img1_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img1_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img1_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-detectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img2_keypoints-detectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/img2_keypoints-pydetectron-R50-FPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img2_keypoints-pydetectron-R50-FPN.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg


--------------------------------------------------------------------------------
/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img1.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img2.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img3.jpg


--------------------------------------------------------------------------------
/demo/sample_images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img4.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img1_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images_keypoints/img1_keypoints.jpg


--------------------------------------------------------------------------------
/demo/sample_images_keypoints/img2_keypoints.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images_keypoints/img2_keypoints.jpg


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/core/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/datasets/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/datasets/cityscapes/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/coco_to_cityscapes_id.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | # mapping coco categories to cityscapes (our converted json) id
17 | # cityscapes
18 | # INFO roidb.py: 220: 1       bicycle: 7286
19 | # INFO roidb.py: 220: 2           car: 53684
20 | # INFO roidb.py: 220: 3        person: 35704
21 | # INFO roidb.py: 220: 4         train: 336
22 | # INFO roidb.py: 220: 5         truck: 964
23 | # INFO roidb.py: 220: 6    motorcycle: 1468
24 | # INFO roidb.py: 220: 7           bus: 758
25 | # INFO roidb.py: 220: 8         rider: 3504
26 | 
27 | # coco (val5k)
28 | # INFO roidb.py: 220: 1        person: 21296
29 | # INFO roidb.py: 220: 2       bicycle: 628
30 | # INFO roidb.py: 220: 3           car: 3818
31 | # INFO roidb.py: 220: 4    motorcycle: 732
32 | # INFO roidb.py: 220: 5      airplane: 286 <------ irrelevant
33 | # INFO roidb.py: 220: 6           bus: 564
34 | # INFO roidb.py: 220: 7         train: 380
35 | # INFO roidb.py: 220: 8         truck: 828
36 | 
37 | 
38 | def cityscapes_to_coco(cityscapes_id):
39 |     lookup = {
40 |         0: 0,  # ... background
41 |         1: 2,  # bicycle
42 |         2: 3,  # car
43 |         3: 1,  # person
44 |         4: 7,  # train
45 |         5: 8,  # truck
46 |         6: 4,  # motorcycle
47 |         7: 6,  # bus
48 |         8: -1,  # rider (-1 means rand init)
49 |     }
50 |     return lookup[cityscapes_id]
51 | 
52 | 
53 | def cityscapes_to_coco_with_rider(cityscapes_id):
54 |     lookup = {
55 |         0: 0,  # ... background
56 |         1: 2,  # bicycle
57 |         2: 3,  # car
58 |         3: 1,  # person
59 |         4: 7,  # train
60 |         5: 8,  # truck
61 |         6: 4,  # motorcycle
62 |         7: 6,  # bus
63 |         8: 1,  # rider ("person", *rider has human right!*)
64 |     }
65 |     return lookup[cityscapes_id]
66 | 
67 | 
68 | def cityscapes_to_coco_without_person_rider(cityscapes_id):
69 |     lookup = {
70 |         0: 0,  # ... background
71 |         1: 2,  # bicycle
72 |         2: 3,  # car
73 |         3: -1,  # person (ignore)
74 |         4: 7,  # train
75 |         5: 8,  # truck
76 |         6: 4,  # motorcycle
77 |         7: 6,  # bus
78 |         8: -1,  # rider (ignore)
79 |     }
80 |     return lookup[cityscapes_id]
81 | 
82 | 
83 | def cityscapes_to_coco_all_random(cityscapes_id):
84 |     lookup = {
85 |         0: -1,  # ... background
86 |         1: -1,  # bicycle
87 |         2: -1,  # car
88 |         3: -1,  # person (ignore)
89 |         4: -1,  # train
90 |         5: -1,  # truck
91 |         6: -1,  # motorcycle
92 |         7: -1,  # bus
93 |         8: -1,  # rider (ignore)
94 |     }
95 |     return lookup[cityscapes_id]
96 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes/tools/convert_coco_model_to_cityscapes.py:
--------------------------------------------------------------------------------
  1 | # Convert a detection model trained for COCO into a model that can be fine-tuned
  2 | # on cityscapes
  3 | #
  4 | # cityscapes_to_coco
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | from __future__ import unicode_literals
 10 | 
 11 | from six.moves import cPickle as pickle
 12 | import argparse
 13 | import os
 14 | import sys
 15 | import numpy as np
 16 | 
 17 | import datasets.cityscapes.coco_to_cityscapes_id as cs
 18 | 
 19 | NUM_CS_CLS = 9
 20 | NUM_COCO_CLS = 81
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(
 25 |         description='Convert a COCO pre-trained model for use with Cityscapes')
 26 |     parser.add_argument(
 27 |         '--coco_model', dest='coco_model_file_name',
 28 |         help='Pretrained network weights file path',
 29 |         default=None, type=str)
 30 |     parser.add_argument(
 31 |         '--convert_func', dest='convert_func',
 32 |         help='Blob conversion function',
 33 |         default='cityscapes_to_coco', type=str)
 34 |     parser.add_argument(
 35 |         '--output', dest='out_file_name',
 36 |         help='Output file path',
 37 |         default=None, type=str)
 38 | 
 39 |     if len(sys.argv) == 1:
 40 |         parser.print_help()
 41 |         sys.exit(1)
 42 | 
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | 
 47 | def convert_coco_blobs_to_cityscape_blobs(model_dict):
 48 |     for k, v in model_dict['blobs'].items():
 49 |         if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS:
 50 |             coco_blob = model_dict['blobs'][k]
 51 |             print(
 52 |                 'Converting COCO blob {} with shape {}'.
 53 |                 format(k, coco_blob.shape)
 54 |             )
 55 |             cs_blob = convert_coco_blob_to_cityscapes_blob(
 56 |                 coco_blob, args.convert_func
 57 |             )
 58 |             print(' -> converted shape {}'.format(cs_blob.shape))
 59 |             model_dict['blobs'][k] = cs_blob
 60 | 
 61 | 
 62 | def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func):
 63 |     # coco blob (81, ...) or (81*4, ...)
 64 |     coco_shape = coco_blob.shape
 65 |     leading_factor = int(coco_shape[0] / NUM_COCO_CLS)
 66 |     tail_shape = list(coco_shape[1:])
 67 |     assert leading_factor == 1 or leading_factor == 4
 68 | 
 69 |     # Reshape in [num_classes, ...] form for easier manipulations
 70 |     coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape)
 71 |     # Default initialization uses Gaussian with mean and std to match the
 72 |     # existing parameters
 73 |     std = coco_blob.std()
 74 |     mean = coco_blob.mean()
 75 |     cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:])
 76 |     cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32)
 77 | 
 78 |     # Replace random parameters with COCO parameters if class mapping exists
 79 |     for i in range(NUM_CS_CLS):
 80 |         coco_cls_id = getattr(cs, convert_func)(i)
 81 |         if coco_cls_id >= 0:  # otherwise ignore (rand init)
 82 |             cs_blob[i] = coco_blob[coco_cls_id]
 83 | 
 84 |     cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape
 85 |     return cs_blob.reshape(cs_shape)
 86 | 
 87 | 
 88 | def remove_momentum(model_dict):
 89 |     for k in model_dict['blobs'].keys():
 90 |         if k.endswith('_momentum'):
 91 |             del model_dict['blobs'][k]
 92 | 
 93 | 
 94 | def load_and_convert_coco_model(args):
 95 |     with open(args.coco_model_file_name, 'r') as f:
 96 |         model_dict = pickle.load(f)
 97 |     remove_momentum(model_dict)
 98 |     convert_coco_blobs_to_cityscape_blobs(model_dict)
 99 |     return model_dict
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     args = parse_args()
104 |     print(args)
105 |     assert os.path.exists(args.coco_model_file_name), \
106 |         'Weights file does not exist'
107 |     weights = load_and_convert_coco_model(args)
108 | 
109 |     with open(args.out_file_name, 'w') as f:
110 |         pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
111 |     print('Wrote blobs to {}:'.format(args.out_file_name))
112 |     print(sorted(weights['blobs'].keys()))
113 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes_json_dataset_evaluator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Functions for evaluating results on Cityscapes."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import cv2
24 | import logging
25 | import os
26 | import uuid
27 | 
28 | import pycocotools.mask as mask_util
29 | 
30 | from core.config import cfg
31 | from datasets.dataset_catalog import DATASETS
32 | from datasets.dataset_catalog import RAW_DIR
33 | 
34 | logger = logging.getLogger(__name__)
35 | 
36 | 
37 | def evaluate_masks(
38 |     json_dataset,
39 |     all_boxes,
40 |     all_segms,
41 |     output_dir,
42 |     use_salt=True,
43 |     cleanup=False
44 | ):
45 |     if cfg.CLUSTER.ON_CLUSTER:
46 |         # On the cluster avoid saving these files in the job directory
47 |         output_dir = '/tmp'
48 |     res_file = os.path.join(
49 |         output_dir, 'segmentations_' + json_dataset.name + '_results')
50 |     if use_salt:
51 |         res_file += '_{}'.format(str(uuid.uuid4()))
52 |     res_file += '.json'
53 | 
54 |     results_dir = os.path.join(output_dir, 'results')
55 |     if not os.path.exists(results_dir):
56 |         os.mkdir(results_dir)
57 | 
58 |     os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR]
59 |     os.environ['CITYSCAPES_RESULTS'] = output_dir
60 | 
61 |     # Load the Cityscapes eval script *after* setting the required env vars,
62 |     # since the script reads their values into global variables (at load time).
63 |     import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
64 |         as cityscapes_eval
65 | 
66 |     roidb = json_dataset.get_roidb()
67 |     for i, entry in enumerate(roidb):
68 |         im_name = entry['image']
69 | 
70 |         basename = os.path.splitext(os.path.basename(im_name))[0]
71 |         txtname = os.path.join(output_dir, basename + 'pred.txt')
72 |         with open(txtname, 'w') as fid_txt:
73 |             if i % 10 == 0:
74 |                 logger.info('i: {}: {}'.format(i, basename))
75 |             for j in range(1, len(all_segms)):
76 |                 clss = json_dataset.classes[j]
77 |                 clss_id = cityscapes_eval.name2label[clss].id
78 |                 segms = all_segms[j][i]
79 |                 boxes = all_boxes[j][i]
80 |                 if segms == []:
81 |                     continue
82 |                 masks = mask_util.decode(segms)
83 | 
84 |                 for k in range(boxes.shape[0]):
85 |                     score = boxes[k, -1]
86 |                     mask = masks[:, :, k]
87 |                     pngname = os.path.join(
88 |                         'results',
89 |                         basename + '_' + clss + '_{}.png'.format(k))
90 |                     # write txt
91 |                     fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score))
92 |                     # save mask
93 |                     cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)
94 |     logger.info('Evaluating...')
95 |     cityscapes_eval.main([])
96 |     return None
97 | 


--------------------------------------------------------------------------------
/lib/datasets/dummy_datasets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use
16 | cases, like getting all classes in a dataset. This exists so that demos can be
17 | run without requiring users to download/install datasets first.
18 | """
19 | 
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | from __future__ import unicode_literals
24 | 
25 | from utils.collections import AttrDict
26 | 
27 | 
28 | def get_coco_dataset():
29 |     """A dummy COCO dataset that includes only the 'classes' field."""
30 |     ds = AttrDict()
31 |     classes = [
32 |         '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
33 |         'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
34 |         'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
35 |         'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
36 |         'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
37 |         'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
38 |         'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
39 |         'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
40 |         'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
41 |         'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
42 |         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
43 |         'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
44 |         'scissors', 'teddy bear', 'hair drier', 'toothbrush'
45 |     ]
46 |     ds.classes = {i: name for i, name in enumerate(classes)}
47 |     return ds
48 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | 
4 | python setup.py build_ext --inplace
5 | rm -rf build
6 | 
7 | 


--------------------------------------------------------------------------------
/lib/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/modeling/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/collect_and_distribute_fpn_rpn_proposals.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from torch import nn
  3 | 
  4 | from core.config import cfg
  5 | from datasets import json_dataset
  6 | import roi_data.fast_rcnn
  7 | import utils.blob as blob_utils
  8 | import utils.fpn as fpn_utils
  9 | 
 10 | 
 11 | class CollectAndDistributeFpnRpnProposalsOp(nn.Module):
 12 |     """Merge RPN proposals generated at multiple FPN levels and then
 13 |     distribute those proposals to their appropriate FPN levels. An anchor
 14 |     at one FPN level may predict an RoI that will map to another level,
 15 |     hence the need to redistribute the proposals.
 16 | 
 17 |     This function assumes standard blob names for input and output blobs.
 18 | 
 19 |     Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,
 20 |                   rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]
 21 |         - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois
 22 |         documentation from GenerateProposals.
 23 |         - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN
 24 |         level i; see rpn_roi_probs documentation from GenerateProposals.
 25 | 
 26 |     If used during training, then the input blobs will also include:
 27 |         [roidb, im_info] (see GenerateProposalLabels).
 28 | 
 29 |     Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,
 30 |                    rois_idx_restore]
 31 |         - rois_fpn<i> are the RPN proposals for FPN level i
 32 |         - rois_idx_restore is a permutation on the concatenation of all
 33 |         rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are
 34 |         restored to their original order in the input blobs.
 35 | 
 36 |     If used during training, then the output blobs will also include:
 37 |         [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
 38 |     """
 39 |     def __init__(self):
 40 |         super().__init__()
 41 | 
 42 |     def forward(self, inputs, roidb, im_info):
 43 |         """
 44 |         Args:
 45 |             inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
 46 |                                rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
 47 |             im_info: [[im_height, im_width, im_scale], ...]
 48 |         """
 49 |         rois = collect(inputs, self.training)
 50 |         if self.training:
 51 |             # During training we reuse the data loader code. We populate roidb
 52 |             # entries on the fly using the rois generated by RPN.
 53 |             im_scales = im_info.data.numpy()[:, 2]
 54 |             # For historical consistency with the original Faster R-CNN
 55 |             # implementation we are *not* filtering crowd proposals.
 56 |             # This choice should be investigated in the future (it likely does
 57 |             # not matter).
 58 |             json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
 59 |             # Compute training labels for the RPN proposals; also handles
 60 |             # distributing the proposals over FPN levels
 61 |             output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
 62 |             blobs = {k: [] for k in output_blob_names}
 63 |             roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
 64 |         else:
 65 |             # For inference we have a special code path that avoids some data
 66 |             # loader overhead
 67 |             blobs = distribute(rois, None)
 68 | 
 69 |         return blobs
 70 | 
 71 | 
 72 | def collect(inputs, is_training):
 73 |     cfg_key = 'TRAIN' if is_training else 'TEST'
 74 |     post_nms_topN = int(cfg[cfg_key].RPN_POST_NMS_TOP_N * cfg.FPN.RPN_COLLECT_SCALE + 0.5)
 75 |     k_max = cfg.FPN.RPN_MAX_LEVEL
 76 |     k_min = cfg.FPN.RPN_MIN_LEVEL
 77 |     num_lvls = k_max - k_min + 1
 78 |     roi_inputs = inputs[:num_lvls]
 79 |     score_inputs = inputs[num_lvls:]
 80 | 
 81 |     # rois are in [[batch_idx, x0, y0, x1, y2], ...] format
 82 |     # Combine predictions across all levels and retain the top scoring
 83 |     rois = np.concatenate(roi_inputs)
 84 |     scores = np.concatenate(score_inputs).squeeze()
 85 |     inds = np.argsort(-scores)[:post_nms_topN]
 86 |     rois = rois[inds, :]
 87 |     return rois
 88 | 
 89 | 
 90 | def distribute(rois, label_blobs):
 91 |     """To understand the output blob order see return value of
 92 |     roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
 93 |     """
 94 |     lvl_min = cfg.FPN.ROI_MIN_LEVEL
 95 |     lvl_max = cfg.FPN.ROI_MAX_LEVEL
 96 |     lvls = fpn_utils.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)
 97 | 
 98 |     # Delete roi entries that have negative area
 99 |     # idx_neg = np.where(lvls == -1)[0]
100 |     # rois = np.delete(rois, idx_neg, axis=0)
101 |     # lvls = np.delete(lvls, idx_neg, axis=0)
102 | 
103 |     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
104 |     outputs = [None] * len(output_blob_names)
105 |     outputs[0] = rois
106 | 
107 |     # Create new roi blobs for each FPN level
108 |     # (See: utils.fpn.add_multilevel_roi_blobs which is similar but annoying
109 |     # to generalize to support this particular case.)
110 |     rois_idx_order = np.empty((0, ))
111 |     for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
112 |         idx_lvl = np.where(lvls == lvl)[0]
113 |         blob_roi_level = rois[idx_lvl, :]
114 |         outputs[output_idx + 1] = blob_roi_level
115 |         rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
116 |     rois_idx_restore = np.argsort(rois_idx_order)
117 |     outputs[-1] = rois_idx_restore.astype(np.int32)
118 | 
119 |     return dict(zip(output_blob_names, outputs))
120 | 


--------------------------------------------------------------------------------
/lib/modeling/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Faster R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick and Sean Bell
 22 | # --------------------------------------------------------
 23 | 
 24 | import numpy as np
 25 | 
 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 27 | #
 28 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 29 | #    >> anchors
 30 | #
 31 | #    anchors =
 32 | #
 33 | #       -83   -39   100    56
 34 | #      -175   -87   192   104
 35 | #      -359  -183   376   200
 36 | #       -55   -55    72    72
 37 | #      -119  -119   136   136
 38 | #      -247  -247   264   264
 39 | #       -35   -79    52    96
 40 | #       -79  -167    96   184
 41 | #      -167  -343   184   360
 42 | 
 43 | # array([[ -83.,  -39.,  100.,   56.],
 44 | #        [-175.,  -87.,  192.,  104.],
 45 | #        [-359., -183.,  376.,  200.],
 46 | #        [ -55.,  -55.,   72.,   72.],
 47 | #        [-119., -119.,  136.,  136.],
 48 | #        [-247., -247.,  264.,  264.],
 49 | #        [ -35.,  -79.,   52.,   96.],
 50 | #        [ -79., -167.,   96.,  184.],
 51 | #        [-167., -343.,  184.,  360.]])
 52 | 
 53 | 
 54 | def generate_anchors(
 55 |     stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
 56 | ):
 57 |     """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
 58 |     are centered on stride / 2, have (approximate) sqrt areas of the specified
 59 |     sizes, and aspect ratios as given.
 60 |     """
 61 |     return _generate_anchors(
 62 |         stride,
 63 |         np.array(sizes, dtype=np.float) / stride,
 64 |         np.array(aspect_ratios, dtype=np.float)
 65 |     )
 66 | 
 67 | 
 68 | def _generate_anchors(base_size, scales, aspect_ratios):
 69 |     """Generate anchor (reference) windows by enumerating aspect ratios X
 70 |     scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
 71 |     """
 72 |     anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
 73 |     anchors = _ratio_enum(anchor, aspect_ratios)
 74 |     anchors = np.vstack(
 75 |         [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
 76 |     )
 77 |     return anchors
 78 | 
 79 | 
 80 | def _whctrs(anchor):
 81 |     """Return width, height, x center, and y center for an anchor (window)."""
 82 |     w = anchor[2] - anchor[0] + 1
 83 |     h = anchor[3] - anchor[1] + 1
 84 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 85 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 86 |     return w, h, x_ctr, y_ctr
 87 | 
 88 | 
 89 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 90 |     """Given a vector of widths (ws) and heights (hs) around a center
 91 |     (x_ctr, y_ctr), output a set of anchors (windows).
 92 |     """
 93 |     ws = ws[:, np.newaxis]
 94 |     hs = hs[:, np.newaxis]
 95 |     anchors = np.hstack(
 96 |         (
 97 |             x_ctr - 0.5 * (ws - 1),
 98 |             y_ctr - 0.5 * (hs - 1),
 99 |             x_ctr + 0.5 * (ws - 1),
100 |             y_ctr + 0.5 * (hs - 1)
101 |         )
102 |     )
103 |     return anchors
104 | 
105 | 
106 | def _ratio_enum(anchor, ratios):
107 |     """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
108 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
109 |     size = w * h
110 |     size_ratios = size / ratios
111 |     ws = np.round(np.sqrt(size_ratios))
112 |     hs = np.round(ws * ratios)
113 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 |     return anchors
115 | 
116 | 
117 | def _scale_enum(anchor, scales):
118 |     """Enumerate a set of anchors for each scale wrt an anchor."""
119 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
120 |     ws = w * scales
121 |     hs = h * scales
122 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
123 |     return anchors
124 | 


--------------------------------------------------------------------------------
/lib/modeling/generate_proposal_labels.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from core.config import cfg
 4 | from datasets import json_dataset
 5 | import roi_data.fast_rcnn
 6 | 
 7 | 
 8 | class GenerateProposalLabelsOp(nn.Module):
 9 |     def __init__(self):
10 |         super().__init__()
11 | 
12 |     def forward(self, rpn_rois, roidb, im_info):
13 |         """Op for generating training labels for RPN proposals. This is used
14 |         when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
15 |         Faster R-CNN training).
16 | 
17 |         blobs_in:
18 |           - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
19 |           - 'roidb': roidb entries that will be labeled
20 |           - 'im_info': See GenerateProposals doc.
21 | 
22 |         blobs_out:
23 |           - (variable set of blobs): returns whatever blobs are required for
24 |             training the model. It does this by querying the data loader for
25 |             the list of blobs that are needed.
26 |         """
27 |         im_scales = im_info.data.numpy()[:, 2]
28 | 
29 |         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
30 |         # For historical consistency with the original Faster R-CNN
31 |         # implementation we are *not* filtering crowd proposals.
32 |         # This choice should be investigated in the future (it likely does
33 |         # not matter).
34 |         # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
35 |         json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0)
36 |         blobs = {k: [] for k in output_blob_names}
37 |         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
38 | 
39 |         return blobs
40 | 


--------------------------------------------------------------------------------
/lib/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | from .parallel import DataParallel
3 | from . import init


--------------------------------------------------------------------------------
/lib/nn/functional.py:
--------------------------------------------------------------------------------
 1 | """Functional interface"""
 2 | 
 3 | 
 4 | def group_norm(x, num_groups, weight=None, bias=None, eps=1e-5):
 5 |     input_shape = x.shape
 6 |     ndim = len(input_shape)
 7 |     N, C = input_shape[:2]
 8 |     G = num_groups
 9 |     assert C % G == 0, "input channel dimension must divisible by number of groups"
10 |     x = x.view(N, G, -1)
11 |     mean = x.mean(-1, keepdim=True)
12 |     var = x.var(-1, keepdim=True)
13 |     x = (x - mean) / (var + eps).sqrt()
14 |     x = x.view(input_shape)
15 |     view_shape = (1, -1) + (1,) * (ndim - 2)
16 |     if weight is not None:
17 |         return x * weight.view(view_shape) + bias.view(view_shape)
18 |     return x
19 | 


--------------------------------------------------------------------------------
/lib/nn/init.py:
--------------------------------------------------------------------------------
 1 | """Parameter initialization functions
 2 | """
 3 | 
 4 | import math
 5 | import operator
 6 | from functools import reduce
 7 | 
 8 | import torch.nn.init as init
 9 | 
10 | 
11 | def XavierFill(tensor):
12 |     """Caffe2 XavierFill Implementation"""
13 |     size = reduce(operator.mul, tensor.shape, 1)
14 |     fan_in = size / tensor.shape[0]
15 |     scale = math.sqrt(3 / fan_in)
16 |     return init.uniform_(tensor, -scale, scale)
17 | 
18 | 
19 | def MSRAFill(tensor):
20 |     """Caffe2 MSRAFill Implementation"""
21 |     size = reduce(operator.mul, tensor.shape, 1)
22 |     fan_out = size / tensor.shape[1]
23 |     scale = math.sqrt(2 / fan_out)
24 |     return init.normal_(tensor, 0, scale)
25 | 


--------------------------------------------------------------------------------
/lib/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .affine import AffineChannel2d
2 | from .normalization import GroupNorm
3 | from .upsample import BilinearInterpolation2d
4 | 


--------------------------------------------------------------------------------
/lib/nn/modules/affine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AffineChannel2d(nn.Module):
 6 |     """ A simple channel-wise affine transformation operation """
 7 |     def __init__(self, num_features):
 8 |         super().__init__()
 9 |         self.num_features = num_features
10 |         self.weight = nn.Parameter(torch.Tensor(num_features))
11 |         self.bias = nn.Parameter(torch.Tensor(num_features))
12 |         self.weight.data.uniform_()
13 |         self.bias.data.zero_()
14 | 
15 |     def forward(self, x):
16 |         return x * self.weight.view(1, self.num_features, 1, 1) + \
17 |             self.bias.view(1, self.num_features, 1, 1)
18 | 


--------------------------------------------------------------------------------
/lib/nn/modules/normalization.py:
--------------------------------------------------------------------------------
 1 | """Normalization Layers"""
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | import nn.functional as myF
 7 | 
 8 | 
 9 | class GroupNorm(nn.Module):
10 |     def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
11 |         super().__init__()
12 |         self.num_groups = num_groups
13 |         self.num_channels = num_channels
14 |         self.eps = eps
15 |         self.affine = affine
16 |         if self.affine:
17 |             self.weight = nn.Parameter(torch.Tensor(num_channels))
18 |             self.bias = nn.Parameter(torch.Tensor(num_channels))
19 |         else:
20 |             self.register_parameter('weight', None)
21 |             self.register_parameter('bias', None)
22 |         self.reset_parameters()
23 | 
24 |     def reset_parameters(self):
25 |         if self.affine:
26 |             self.weight.data.fill_(1)
27 |             self.bias.data.zero_()
28 | 
29 |     def forward(self, x):
30 |         return myF.group_norm(
31 |             x, self.num_groups, self.weight, self.bias, self.eps
32 |         )
33 | 
34 |     def extra_repr(self):
35 |         return '{num_groups}, {num_channels}, eps={eps}, ' \
36 |             'affine={affine}'.format(**self.__dict__)
37 | 


--------------------------------------------------------------------------------
/lib/nn/modules/upsample.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | 
 8 | 
 9 | class BilinearInterpolation2d(nn.Module):
10 |     """Bilinear interpolation in space of scale.
11 | 
12 |     Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
13 | 
14 |     Adapted from the CVPR'15 FCN code.
15 |     See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
16 |     """
17 |     def __init__(self, in_channels, out_channels, up_scale):
18 |         super().__init__()
19 |         assert in_channels == out_channels
20 |         assert up_scale % 2 == 0, 'Scale should be even'
21 |         self.in_channes = in_channels
22 |         self.out_channels = out_channels
23 |         self.up_scale = int(up_scale)
24 |         self.padding = up_scale // 2
25 | 
26 |         def upsample_filt(size):
27 |             factor = (size + 1) // 2
28 |             if size % 2 == 1:
29 |                 center = factor - 1
30 |             else:
31 |                 center = factor - 0.5
32 |             og = np.ogrid[:size, :size]
33 |             return ((1 - abs(og[0] - center) / factor) *
34 |                     (1 - abs(og[1] - center) / factor))
35 | 
36 |         kernel_size = up_scale * 2
37 |         bil_filt = upsample_filt(kernel_size)
38 | 
39 |         kernel = np.zeros(
40 |             (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32
41 |         )
42 |         kernel[range(in_channels), range(out_channels), :, :] = bil_filt
43 | 
44 |         self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
45 |                                          stride=self.up_scale, padding=self.padding)
46 | 
47 |         self.upconv.weight.data.copy_(torch.from_numpy(kernel))
48 |         self.upconv.bias.data.fill_(0)
49 |         self.upconv.weight.requires_grad = False
50 |         self.upconv.bias.requires_grad = False
51 | 
52 |     def forward(self, x):
53 |         return self.upconv(x)
54 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | from .parallel_apply import parallel_apply
2 | from .replicate import replicate
3 | from .data_parallel import DataParallel, data_parallel
4 | from .scatter_gather import scatter, gather
5 | 
6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel',
7 |            'DataParallel']
8 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/_functions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.cuda.comm as comm
  3 | from torch.autograd import Function
  4 | 
  5 | 
  6 | class Broadcast(Function):
  7 | 
  8 |     @staticmethod
  9 |     def forward(ctx, target_gpus, *inputs):
 10 |         if not all(input.is_cuda for input in inputs):
 11 |             raise TypeError('Broadcast function not implemented for CPU tensors')
 12 |         ctx.target_gpus = target_gpus
 13 |         if len(inputs) == 0:
 14 |             return tuple()
 15 |         ctx.num_inputs = len(inputs)
 16 |         ctx.input_device = inputs[0].get_device()
 17 |         outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus)
 18 |         non_differentiables = []
 19 |         for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]):
 20 |             if not input_requires_grad:
 21 |                 for output in outputs:
 22 |                     non_differentiables.append(output[idx])
 23 |         ctx.mark_non_differentiable(*non_differentiables)
 24 |         return tuple([t for tensors in outputs for t in tensors])
 25 | 
 26 |     @staticmethod
 27 |     def backward(ctx, *grad_outputs):
 28 |         return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs)
 29 | 
 30 | 
 31 | class ReduceAddCoalesced(Function):
 32 | 
 33 |     @staticmethod
 34 |     def forward(ctx, destination, num_inputs, *grads):
 35 |         ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)]
 36 | 
 37 |         grads = [grads[i:i + num_inputs]
 38 |                  for i in range(0, len(grads), num_inputs)]
 39 |         return comm.reduce_add_coalesced(grads, destination)
 40 | 
 41 |     @staticmethod
 42 |     def backward(ctx, *grad_outputs):
 43 |         return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs)
 44 | 
 45 | 
 46 | class Gather(Function):
 47 | 
 48 |     @staticmethod
 49 |     def forward(ctx, target_device, dim, *inputs):
 50 |         assert all(map(lambda i: i.is_cuda, inputs))
 51 |         ctx.target_device = target_device
 52 |         ctx.dim = dim
 53 |         ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs))
 54 |         ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
 55 |         return comm.gather(inputs, ctx.dim, ctx.target_device)
 56 | 
 57 |     @staticmethod
 58 |     def backward(ctx, grad_output):
 59 |         return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output)
 60 | 
 61 | 
 62 | class Scatter(Function):
 63 | 
 64 |     @staticmethod
 65 |     def forward(ctx, target_gpus, chunk_sizes, dim, input):
 66 |         ctx.target_gpus = target_gpus
 67 |         ctx.chunk_sizes = chunk_sizes
 68 |         ctx.dim = dim
 69 |         ctx.input_device = input.get_device() if input.is_cuda else -1
 70 |         streams = None
 71 |         if ctx.input_device == -1:
 72 |             # Perform CPU to GPU copies in a background stream
 73 |             streams = [_get_stream(device) for device in ctx.target_gpus]
 74 |         outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams)
 75 |         # Synchronize with the copy stream
 76 |         if streams is not None:
 77 |             for i, output in enumerate(outputs):
 78 |                 with torch.cuda.device(ctx.target_gpus[i]):
 79 |                     main_stream = torch.cuda.current_stream()
 80 |                     main_stream.wait_stream(streams[i])
 81 |                     output.record_stream(main_stream)
 82 |         return outputs
 83 | 
 84 |     @staticmethod
 85 |     def backward(ctx, *grad_output):
 86 |         return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output)
 87 | 
 88 | 
 89 | # background streams used for copying
 90 | _streams = None
 91 | 
 92 | 
 93 | def _get_stream(device):
 94 |     """Gets a background stream for copying between CPU and GPU"""
 95 |     global _streams
 96 |     if device == -1:
 97 |         return None
 98 |     if _streams is None:
 99 |         _streams = [None] * torch.cuda.device_count()
100 |     if _streams[device] is None:
101 |         _streams[device] = torch.cuda.Stream(device)
102 |     return _streams[device]
103 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/parallel_apply.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | def get_a_var(obj):
 7 |     if isinstance(obj, Variable):
 8 |         return obj
 9 | 
10 |     if isinstance(obj, list) or isinstance(obj, tuple):
11 |         results = map(get_a_var, obj)
12 |         for result in results:
13 |             if isinstance(result, Variable):
14 |                 return result
15 |     if isinstance(obj, dict):
16 |         results = map(get_a_var, obj.items())
17 |         for result in results:
18 |             if isinstance(result, Variable):
19 |                 return result
20 |     return None
21 | 
22 | 
23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None):
24 |     assert len(modules) == len(inputs)
25 |     if kwargs_tup is not None:
26 |         assert len(modules) == len(kwargs_tup)
27 |     else:
28 |         kwargs_tup = ({},) * len(modules)
29 |     if devices is not None:
30 |         assert len(modules) == len(devices)
31 |     else:
32 |         devices = [None] * len(modules)
33 | 
34 |     lock = threading.Lock()
35 |     results = {}
36 | 
37 |     def _worker(i, module, input, kwargs, results, lock, device=None):
38 |         if device is None:
39 |             device = get_a_var(input).get_device()
40 |         try:
41 |             with torch.cuda.device(device):
42 |                 output = module(*input, **kwargs)
43 |             with lock:
44 |                 results[i] = output
45 |         except Exception as e:
46 |             with lock:
47 |                 results[i] = e
48 | 
49 |     if len(modules) > 1:
50 |         threads = [threading.Thread(target=_worker,
51 |                                     args=(i, module, input, kwargs, results, lock, device),
52 |                                     )
53 |                    for i, (module, input, kwargs, device) in
54 |                    enumerate(zip(modules, inputs, kwargs_tup, devices))]
55 | 
56 |         for thread in threads:
57 |             thread.start()
58 |         for thread in threads:
59 |             thread.join()
60 |     else:
61 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0])
62 | 
63 |     outputs = []
64 |     for i in range(len(inputs)):
65 |         output = results[i]
66 |         if isinstance(output, Exception):
67 |             raise output
68 |         outputs.append(output)
69 |     return outputs
70 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/replicate.py:
--------------------------------------------------------------------------------
 1 | import torch.cuda.comm as comm
 2 | 
 3 | 
 4 | def replicate(network, devices):
 5 |     from ._functions import Broadcast
 6 | 
 7 |     devices = tuple(devices)
 8 |     num_replicas = len(devices)
 9 | 
10 |     params = list(network.parameters())
11 |     param_indices = {param: idx for idx, param in enumerate(params)}
12 |     param_copies = Broadcast.apply(devices, *params)
13 |     if len(params) > 0:
14 |         param_copies = [param_copies[i:i + len(params)]
15 |                         for i in range(0, len(param_copies), len(params))]
16 | 
17 |     buffers = list(network.buffers())
18 |     buffer_indices = {buf: idx for idx, buf in enumerate(buffers)}
19 |     buffer_copies = comm.broadcast_coalesced(buffers, devices)
20 | 
21 |     modules = list(network.modules())
22 |     module_copies = [[] for device in devices]
23 |     module_indices = {}
24 | 
25 |     for i, module in enumerate(modules):
26 |         module_indices[module] = i
27 |         for j in range(num_replicas):
28 |             replica = module.__new__(type(module))
29 |             replica.__dict__ = module.__dict__.copy()
30 |             replica._parameters = replica._parameters.copy()
31 |             replica._buffers = replica._buffers.copy()
32 |             replica._modules = replica._modules.copy()
33 |             module_copies[j].append(replica)
34 | 
35 |     for i, module in enumerate(modules):
36 |         for key, child in module._modules.items():
37 |             if child is None:
38 |                 for j in range(num_replicas):
39 |                     replica = module_copies[j][i]
40 |                     replica._modules[key] = None
41 |             else:
42 |                 module_idx = module_indices[child]
43 |                 for j in range(num_replicas):
44 |                     replica = module_copies[j][i]
45 |                     replica._modules[key] = module_copies[j][module_idx]
46 |         for key, param in module._parameters.items():
47 |             if param is None:
48 |                 for j in range(num_replicas):
49 |                     replica = module_copies[j][i]
50 |                     replica._parameters[key] = None
51 |             else:
52 |                 param_idx = param_indices[param]
53 |                 for j in range(num_replicas):
54 |                     replica = module_copies[j][i]
55 |                     replica._parameters[key] = param_copies[j][param_idx]
56 |         for key, buf in module._buffers.items():
57 |             if buf is None:
58 |                 for j in range(num_replicas):
59 |                     replica = module_copies[j][i]
60 |                     replica._buffers[key] = None
61 |             else:
62 |                 buffer_idx = buffer_indices[buf]
63 |                 for j in range(num_replicas):
64 |                     replica = module_copies[j][i]
65 |                     replica._buffers[key] = buffer_copies[j][buffer_idx]
66 | 
67 |     return [module_copies[j][0] for j in range(num_replicas)]
68 | 


--------------------------------------------------------------------------------
/lib/nn/parallel/scatter_gather.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import re
  3 | import numpy as np
  4 | import torch
  5 | from torch.autograd import Variable
  6 | from ._functions import Scatter, Gather
  7 | from torch._six import string_classes, int_classes
  8 | 
  9 | numpy_type_map = {
 10 |     'float64': torch.DoubleTensor,
 11 |     'float32': torch.FloatTensor,
 12 |     'float16': torch.HalfTensor,
 13 |     'int64': torch.LongTensor,
 14 |     'int32': torch.IntTensor,
 15 |     'int16': torch.ShortTensor,
 16 |     'int8': torch.CharTensor,
 17 |     'uint8': torch.ByteTensor,
 18 | }
 19 | 
 20 | 
 21 | def scatter(inputs, target_gpus, dim=0):
 22 |     r"""
 23 |     Slices variables into approximately equal chunks and
 24 |     distributes them across given GPUs. Duplicates
 25 |     references to objects that are not variables. Does not
 26 |     support Tensors.
 27 |     """
 28 |     def scatter_map(obj):
 29 |         if isinstance(obj, Variable):
 30 |             return Scatter.apply(target_gpus, None, dim, obj)
 31 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
 32 |         if isinstance(obj, tuple) and len(obj) > 0:
 33 |             return list(zip(*map(scatter_map, obj)))
 34 |         if isinstance(obj, list) and len(obj) > 0:
 35 |             return list(map(list, zip(*map(scatter_map, obj))))
 36 |         if isinstance(obj, dict) and len(obj) > 0:
 37 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
 38 |         return [obj for targets in target_gpus]
 39 | 
 40 |     # After scatter_map is called, a scatter_map cell will exist. This cell
 41 |     # has a reference to the actual function scatter_map, which has references
 42 |     # to a closure that has a reference to the scatter_map cell (because the
 43 |     # fn is recursive). To avoid this reference cycle, we set the function to
 44 |     # None, clearing the cell
 45 |     try:
 46 |         return scatter_map(inputs)
 47 |     finally:
 48 |         scatter_map = None
 49 | 
 50 | 
 51 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
 52 |     r"""Scatter with support for kwargs dictionary"""
 53 |     inputs = scatter(inputs, target_gpus, dim) if inputs else []
 54 |     kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
 55 |     if len(inputs) < len(kwargs):
 56 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
 57 |     elif len(kwargs) < len(inputs):
 58 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
 59 |     inputs = tuple(inputs)
 60 |     kwargs = tuple(kwargs)
 61 |     return inputs, kwargs
 62 | 
 63 | 
 64 | def gather(outputs, target_device, dim=0):
 65 |     r"""
 66 |     Gathers variables from different GPUs on a specified device
 67 |       (-1 means the CPU).
 68 |     """
 69 |     error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}"
 70 | 
 71 |     def gather_map(outputs):
 72 |         out = outputs[0]
 73 |         elem_type = type(out)
 74 |         if isinstance(out, Variable):
 75 |             return Gather.apply(target_device, dim, *outputs)
 76 |         if out is None:
 77 |             return None
 78 |         if isinstance(out, collections.Sequence):
 79 |             return type(out)(map(gather_map, zip(*outputs)))
 80 |         elif isinstance(out, collections.Mapping):
 81 |             return {key: gather_map([d[key] for d in outputs]) for key in out}
 82 |         elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
 83 |                 and elem_type.__name__ != 'string_':
 84 |             elem = out
 85 |             if elem_type.__name__ == 'ndarray':
 86 |                 # array of string classes and object
 87 |                 if re.search('[SaUO]', elem.dtype.str) is not None:
 88 |                     raise TypeError(error_msg.format(elem.dtype))
 89 | 
 90 |                 return Variable(torch.from_numpy(np.concatenate(outputs, dim)))
 91 |             if elem.shape == ():  # scalars
 92 |                 py_type = float if elem.dtype.name.startswith('float') else int
 93 |                 return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs))))
 94 |         elif isinstance(out, int_classes):
 95 |             return Variable(torch.LongTensor(outputs))
 96 |         elif isinstance(out, float):
 97 |             return Variable(torch.DoubleTensor(outputs))
 98 |         elif isinstance(out, string_classes):
 99 |             return outputs
100 | 
101 |         raise TypeError((error_msg.format(elem_type)))
102 | 
103 |     # Recursive function calls like this create reference cycles.
104 |     # Setting the function to None clears the refcycle.
105 |     try:
106 |         return gather_map(outputs)
107 |     finally:
108 |         gather_map = None
109 | 


--------------------------------------------------------------------------------
/lib/roi_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/roi_data/__init__.py


--------------------------------------------------------------------------------
/lib/roi_data/data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | from __future__ import unicode_literals
 23 | 
 24 | from collections import namedtuple
 25 | import logging
 26 | import numpy as np
 27 | import threading
 28 | 
 29 | from core.config import cfg
 30 | from modeling.generate_anchors import generate_anchors
 31 | import utils.boxes as box_utils
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the
 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range
 38 | # of aspect ratios
 39 | FieldOfAnchors = namedtuple(
 40 |     'FieldOfAnchors', [
 41 |         'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',
 42 |         'octave', 'aspect'
 43 |     ]
 44 | )
 45 | 
 46 | # Cache for memoizing _get_field_of_anchors
 47 | _threadlocal_foa = threading.local()
 48 | 
 49 | 
 50 | def get_field_of_anchors(
 51 |     stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None
 52 | ):
 53 |     global _threadlocal_foa
 54 |     if not hasattr(_threadlocal_foa, 'cache'):
 55 |         _threadlocal_foa.cache = {}
 56 | 
 57 |     cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)
 58 |     if cache_key in _threadlocal_foa.cache:
 59 |         return _threadlocal_foa.cache[cache_key]
 60 | 
 61 |     # Anchors at a single feature cell
 62 |     cell_anchors = generate_anchors(
 63 |         stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios
 64 |     )
 65 |     num_cell_anchors = cell_anchors.shape[0]
 66 | 
 67 |     # Generate canonical proposals from shifted anchors
 68 |     # Enumerate all shifted positions on the (H, W) grid
 69 |     fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(
 70 |         cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)
 71 |     )
 72 |     field_size = int(np.ceil(fpn_max_size / float(stride)))
 73 |     shifts = np.arange(0, field_size) * stride
 74 |     shift_x, shift_y = np.meshgrid(shifts, shifts)
 75 |     shift_x = shift_x.ravel()
 76 |     shift_y = shift_y.ravel()
 77 |     shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
 78 | 
 79 |     # Broacast anchors over shifts to enumerate all anchors at all positions
 80 |     # in the (H, W) grid:
 81 |     #   - add A cell anchors of shape (1, A, 4) to
 82 |     #   - K shifts of shape (K, 1, 4) to get
 83 |     #   - all shifted anchors of shape (K, A, 4)
 84 |     #   - reshape to (K*A, 4) shifted anchors
 85 |     A = num_cell_anchors
 86 |     K = shifts.shape[0]
 87 |     field_of_anchors = (
 88 |         cell_anchors.reshape((1, A, 4)) +
 89 |         shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 90 |     )
 91 |     field_of_anchors = field_of_anchors.reshape((K * A, 4))
 92 |     foa = FieldOfAnchors(
 93 |         field_of_anchors=field_of_anchors.astype(np.float32),
 94 |         num_cell_anchors=num_cell_anchors,
 95 |         stride=stride,
 96 |         field_size=field_size,
 97 |         octave=octave,
 98 |         aspect=aspect
 99 |     )
100 |     _threadlocal_foa.cache[cache_key] = foa
101 |     return foa
102 | 
103 | 
104 | def unmap(data, count, inds, fill=0):
105 |     """Unmap a subset of item (data) back to the original set of items (of
106 |     size count)"""
107 |     if count == len(inds):
108 |         return data
109 | 
110 |     if len(data.shape) == 1:
111 |         ret = np.empty((count, ), dtype=data.dtype)
112 |         ret.fill(fill)
113 |         ret[inds] = data
114 |     else:
115 |         ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)
116 |         ret.fill(fill)
117 |         ret[inds, :] = data
118 |     return ret
119 | 
120 | 
121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
122 |     """Compute bounding-box regression targets for an image."""
123 |     return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
124 |         np.float32, copy=False
125 |     )
126 | 


--------------------------------------------------------------------------------
/lib/roi_data/keypoint_rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | """Construct minibatches for Mask R-CNN training when keypoints are enabled.
 16 | Handles the minibatch blobs that are specific to training Mask R-CNN for
 17 | keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are
 18 | handled by their respecitive roi_data modules.
 19 | """
 20 | 
 21 | from __future__ import absolute_import
 22 | from __future__ import division
 23 | from __future__ import print_function
 24 | from __future__ import unicode_literals
 25 | 
 26 | import numpy as np
 27 | 
 28 | from core.config import cfg
 29 | import utils.blob as blob_utils
 30 | import utils.keypoints as keypoint_utils
 31 | 
 32 | 
 33 | def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale,
 34 |                             batch_idx):
 35 |     """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
 36 |     # Note: gt_inds must match how they're computed in
 37 |     # datasets.json_dataset._merge_proposal_boxes_into_roidb
 38 |     gt_inds = np.where(roidb['gt_classes'] > 0)[0]
 39 |     max_overlaps = roidb['max_overlaps']
 40 |     gt_keypoints = roidb['gt_keypoints']
 41 | 
 42 |     ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
 43 |     within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
 44 |     vis_kp = gt_keypoints[ind_kp, 2, :] > 0
 45 |     is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
 46 |     kp_fg_inds = np.where(
 47 |         np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0]
 48 | 
 49 |     kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
 50 |     if kp_fg_inds.size > kp_fg_rois_per_this_image:
 51 |         kp_fg_inds = np.random.choice(
 52 |             kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False)
 53 | 
 54 |     sampled_fg_rois = roidb['boxes'][kp_fg_inds]
 55 |     box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]
 56 | 
 57 |     num_keypoints = gt_keypoints.shape[2]
 58 |     sampled_keypoints = -np.ones(
 59 |         (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
 60 |         dtype=gt_keypoints.dtype)
 61 |     for ii in range(len(sampled_fg_rois)):
 62 |         ind = box_to_gt_ind_map[ii]
 63 |         if ind >= 0:
 64 |             sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
 65 |             assert np.sum(sampled_keypoints[ii, 2, :]) > 0
 66 | 
 67 |     heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
 68 |         sampled_keypoints, sampled_fg_rois)
 69 | 
 70 |     shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS,)
 71 |     heats = heats.reshape(shape)
 72 |     weights = weights.reshape(shape)
 73 | 
 74 |     sampled_fg_rois *= im_scale
 75 |     repeated_batch_idx = batch_idx * blob_utils.ones((sampled_fg_rois.shape[0],
 76 |                                                       1))
 77 |     sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))
 78 | 
 79 |     blobs['keypoint_rois'] = sampled_fg_rois
 80 |     blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
 81 |     blobs['keypoint_weights'] = weights
 82 | 
 83 | 
 84 | def finalize_keypoint_minibatch(blobs, valid):
 85 |     """Finalize the minibatch after blobs for all minibatch images have been
 86 |     collated.
 87 |     """
 88 |     min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH
 89 |     num_visible_keypoints = np.sum(blobs['keypoint_weights'])
 90 |     valid = (valid and len(blobs['keypoint_weights']) > 0
 91 |              and num_visible_keypoints > min_count)
 92 |     # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
 93 |     # See modeling.model_builder.add_keypoint_losses
 94 |     norm = num_visible_keypoints / (
 95 |         cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.TRAIN.
 96 |         FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS)
 97 |     blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32)
 98 |     return valid
 99 | 
100 | 
101 | def _within_box(points, boxes):
102 |     """Validate which keypoints are contained inside a given box.
103 | 
104 |     points: Nx2xK
105 |     boxes: Nx4
106 |     output: NxK
107 |     """
108 |     x_within = np.logical_and(
109 |         points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1),
110 |         points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1))
111 |     y_within = np.logical_and(
112 |         points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1),
113 |         points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1))
114 |     return np.logical_and(x_within, y_within)
115 | 


--------------------------------------------------------------------------------
/lib/roi_data/mask_rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | """Construct minibatches for Mask R-CNN training. Handles the minibatch blobs
 16 | that are specific to Mask R-CNN. Other blobs that are generic to RPN or
 17 | Fast/er R-CNN are handled by their respecitive roi_data modules.
 18 | """
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | from __future__ import unicode_literals
 24 | 
 25 | import logging
 26 | import numpy as np
 27 | 
 28 | from core.config import cfg
 29 | import utils.blob as blob_utils
 30 | import utils.boxes as box_utils
 31 | import utils.segms as segm_utils
 32 | 
 33 | 
 34 | def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
 35 |     """Add Mask R-CNN specific blobs to the input blob dictionary."""
 36 |     # Prepare the mask targets by associating one gt mask to each training roi
 37 |     # that has a fg (non-bg) class label.
 38 |     M = cfg.MRCNN.RESOLUTION
 39 |     polys_gt_inds = np.where((roidb['gt_classes'] > 0) &
 40 |                              (roidb['is_crowd'] == 0))[0]
 41 |     polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
 42 |     boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
 43 |     # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds]
 44 |     fg_inds = np.where(blobs['labels_int32'] > 0)[0]
 45 |     roi_has_mask = blobs['labels_int32'].copy()
 46 |     roi_has_mask[roi_has_mask > 0] = 1
 47 | 
 48 |     if fg_inds.shape[0] > 0:
 49 |         # Class labels for the foreground rois
 50 |         mask_class_labels = blobs['labels_int32'][fg_inds]
 51 |         masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
 52 | 
 53 |         # Find overlap between all foreground rois and the bounding boxes
 54 |         # enclosing each segmentation
 55 |         rois_fg = sampled_boxes[fg_inds]
 56 |         overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
 57 |             rois_fg.astype(np.float32, copy=False),
 58 |             boxes_from_polys.astype(np.float32, copy=False))
 59 |         # Map from each fg rois to the index of the mask with highest overlap
 60 |         # (measured by bbox overlap)
 61 |         fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)
 62 | 
 63 |         # add fg targets
 64 |         for i in range(rois_fg.shape[0]):
 65 |             fg_polys_ind = fg_polys_inds[i]
 66 |             poly_gt = polys_gt[fg_polys_ind]
 67 |             roi_fg = rois_fg[i]
 68 |             # Rasterize the portion of the polygon mask within the given fg roi
 69 |             # to an M x M binary image
 70 |             mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
 71 |             mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
 72 |             masks[i, :] = np.reshape(mask, M**2)
 73 |     else:  # If there are no fg masks (it does happen)
 74 |         # The network cannot handle empty blobs, so we must provide a mask
 75 |         # We simply take the first bg roi, given it an all -1's mask (ignore
 76 |         # label), and label it with class zero (bg).
 77 |         bg_inds = np.where(blobs['labels_int32'] == 0)[0]
 78 |         # rois_fg is actually one background roi, but that's ok because ...
 79 |         rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
 80 |         # We give it an -1's blob (ignore label)
 81 |         masks = -blob_utils.ones((1, M**2), int32=True)
 82 |         # We label it with class = 0 (background)
 83 |         mask_class_labels = blob_utils.zeros((1, ))
 84 |         # Mark that the first roi has a mask
 85 |         roi_has_mask[0] = 1
 86 | 
 87 |     if cfg.MRCNN.CLS_SPECIFIC_MASK:
 88 |         masks = _expand_to_class_specific_mask_targets(masks,
 89 |                                                        mask_class_labels)
 90 | 
 91 |     # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
 92 |     rois_fg *= im_scale
 93 |     repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
 94 |     rois_fg = np.hstack((repeated_batch_idx, rois_fg))
 95 | 
 96 |     # Update blobs dict with Mask R-CNN blobs
 97 |     blobs['mask_rois'] = rois_fg
 98 |     blobs['roi_has_mask_int32'] = roi_has_mask
 99 |     blobs['masks_int32'] = masks
100 | 
101 | 
102 | def _expand_to_class_specific_mask_targets(masks, mask_class_labels):
103 |     """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
104 |     to encode class specific mask targets.
105 |     """
106 |     assert masks.shape[0] == mask_class_labels.shape[0]
107 |     M = cfg.MRCNN.RESOLUTION
108 | 
109 |     # Target values of -1 are "don't care" / ignore labels
110 |     mask_targets = -blob_utils.ones(
111 |         (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True)
112 | 
113 |     for i in range(masks.shape[0]):
114 |         cls = int(mask_class_labels[i])
115 |         start = M**2 * cls
116 |         end = start + M**2
117 |         # Ignore background instance
118 |         # (only happens when there is no fg samples in an image)
119 |         if cls > 0:
120 |             mask_targets[i, start:end] = masks[i, :]
121 | 
122 |     return mask_targets
123 | 


--------------------------------------------------------------------------------
/lib/roi_data/minibatch.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from core.config import cfg
 5 | import utils.blob as blob_utils
 6 | import roi_data.rpn
 7 | 
 8 | 
 9 | def get_minibatch_blob_names(is_training=True):
10 |     """Return blob names in the order in which they are read by the data loader.
11 |     """
12 |     # data blob: holds a batch of N images, each with 3 channels
13 |     blob_names = ['data']
14 |     if cfg.RPN.RPN_ON:
15 |         # RPN-only or end-to-end Faster R-CNN
16 |         blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training)
17 |     elif cfg.RETINANET.RETINANET_ON:
18 |         raise NotImplementedError
19 |     else:
20 |         # Fast R-CNN like models trained on precomputed proposals
21 |         blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names(
22 |             is_training=is_training
23 |         )
24 |     return blob_names
25 | 
26 | 
27 | def get_minibatch(roidb):
28 |     """Given a roidb, construct a minibatch sampled from it."""
29 |     # We collect blobs from each image onto a list and then concat them into a
30 |     # single tensor, hence we initialize each blob to an empty list
31 |     blobs = {k: [] for k in get_minibatch_blob_names()}
32 | 
33 |     # Get the input image blob
34 |     im_blob, im_scales = _get_image_blob(roidb)
35 |     blobs['data'] = im_blob
36 |     if cfg.RPN.RPN_ON:
37 |         # RPN-only or end-to-end Faster/Mask R-CNN
38 |         valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb)
39 |     elif cfg.RETINANET.RETINANET_ON:
40 |         raise NotImplementedError
41 |     else:
42 |         # Fast R-CNN like models trained on precomputed proposals
43 |         valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
44 |     return blobs, valid
45 | 
46 | 
47 | def _get_image_blob(roidb):
48 |     """Builds an input blob from the images in the roidb at the specified
49 |     scales.
50 |     """
51 |     num_images = len(roidb)
52 |     # Sample random scales to use for each image in this batch
53 |     scale_inds = np.random.randint(
54 |         0, high=len(cfg.TRAIN.SCALES), size=num_images)
55 |     processed_ims = []
56 |     im_scales = []
57 |     for i in range(num_images):
58 |         im = cv2.imread(roidb[i]['image'])
59 |         assert im is not None, \
60 |             'Failed to read image \'{}\''.format(roidb[i]['image'])
61 |         # If NOT using opencv to read in images, uncomment following lines
62 |         # if len(im.shape) == 2:
63 |         #     im = im[:, :, np.newaxis]
64 |         #     im = np.concatenate((im, im, im), axis=2)
65 |         # # flip the channel, since the original one using cv2
66 |         # # rgb -> bgr
67 |         # im = im[:, :, ::-1]
68 |         if roidb[i]['flipped']:
69 |             im = im[:, ::-1, :]
70 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
71 |         im, im_scale = blob_utils.prep_im_for_blob(
72 |             im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
73 |         im_scales.append(im_scale[0])
74 |         processed_ims.append(im[0])
75 | 
76 |     # Create a blob to hold the input images [n, c, h, w]
77 |     blob = blob_utils.im_list_to_blob(processed_ims)
78 | 
79 |     return blob, im_scales
80 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from __future__ import print_function
 9 | 
10 | from Cython.Build import cythonize
11 | from Cython.Distutils import build_ext
12 | from setuptools import Extension
13 | from setuptools import setup
14 | 
15 | import numpy as np
16 | 
17 | 
18 | # Obtain the numpy include directory.  This logic works across numpy versions.
19 | try:
20 |     numpy_include = np.get_include()
21 | except AttributeError:
22 |     numpy_include = np.get_numpy_include()
23 | 
24 | 
25 | ext_modules = [
26 |     Extension(
27 |         name='utils.cython_bbox',
28 |         sources=['utils/cython_bbox.pyx'],
29 |         extra_compile_args=['-Wno-cpp'],
30 |         include_dirs=[numpy_include]
31 |     ),
32 |     Extension(
33 |         name='utils.cython_nms',
34 |         sources=['utils/cython_nms.pyx'],
35 |         extra_compile_args=['-Wno-cpp'],
36 |         include_dirs=[numpy_include]
37 |     )
38 | ]
39 | 
40 | setup(
41 |     name='mask_rcnn',
42 |     ext_modules=cythonize(ext_modules)
43 | )
44 | 
45 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Fast R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick
 22 | # --------------------------------------------------------
 23 | """blob helper functions."""
 24 | 
 25 | from __future__ import absolute_import
 26 | from __future__ import division
 27 | from __future__ import print_function
 28 | from __future__ import unicode_literals
 29 | 
 30 | from six.moves import cPickle as pickle
 31 | import numpy as np
 32 | import cv2
 33 | 
 34 | from core.config import cfg
 35 | 
 36 | 
 37 | def get_image_blob(im, target_scale, target_max_size):
 38 |     """Convert an image into a network input.
 39 | 
 40 |     Arguments:
 41 |         im (ndarray): a color image in BGR order
 42 | 
 43 |     Returns:
 44 |         blob (ndarray): a data blob holding an image pyramid
 45 |         im_scale (float): image scale (target size) / (original size)
 46 |         im_info (ndarray)
 47 |     """
 48 |     processed_im, im_scale = prep_im_for_blob(
 49 |         im, cfg.PIXEL_MEANS, [target_scale], target_max_size
 50 |     )
 51 |     blob = im_list_to_blob(processed_im)
 52 |     # NOTE: this height and width may be larger than actual scaled input image
 53 |     # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
 54 |     # maintaining this behavior for now to make existing results exactly
 55 |     # reproducible (in practice using the true input image height and width
 56 |     # yields nearly the same results, but they are sometimes slightly different
 57 |     # because predictions near the edge of the image will be pruned more
 58 |     # aggressively).
 59 |     height, width = blob.shape[2], blob.shape[3]
 60 |     im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
 61 |     return blob, im_scale, im_info.astype(np.float32)
 62 | 
 63 | 
 64 | def im_list_to_blob(ims):
 65 |     """Convert a list of images into a network input. Assumes images were
 66 |     prepared using prep_im_for_blob or equivalent: i.e.
 67 |       - BGR channel order
 68 |       - pixel means subtracted
 69 |       - resized to the desired input size
 70 |       - float32 numpy ndarray format
 71 |     Output is a 4D HCHW tensor of the images concatenated along axis 0 with
 72 |     shape.
 73 |     """
 74 |     if not isinstance(ims, list):
 75 |         ims = [ims]
 76 |     max_shape = get_max_shape([im.shape[:2] for im in ims])
 77 | 
 78 |     num_images = len(ims)
 79 |     blob = np.zeros(
 80 |         (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32)
 81 |     for i in range(num_images):
 82 |         im = ims[i]
 83 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
 84 |     # Move channels (axis 3) to axis 1
 85 |     # Axis order will become: (batch elem, channel, height, width)
 86 |     channel_swap = (0, 3, 1, 2)
 87 |     blob = blob.transpose(channel_swap)
 88 |     return blob
 89 | 
 90 | 
 91 | def get_max_shape(im_shapes):
 92 |     """Calculate max spatial size (h, w) for batching given a list of image shapes
 93 |     """
 94 |     max_shape = np.array(im_shapes).max(axis=0)
 95 |     assert max_shape.size == 2
 96 |     # Pad the image so they can be divisible by a stride
 97 |     if cfg.FPN.FPN_ON:
 98 |         stride = float(cfg.FPN.COARSEST_STRIDE)
 99 |         max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
100 |         max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
101 |     return max_shape
102 | 
103 | 
104 | def prep_im_for_blob(im, pixel_means, target_sizes, max_size):
105 |     """Prepare an image for use as a network input blob. Specially:
106 |       - Subtract per-channel pixel mean
107 |       - Convert to float32
108 |       - Rescale to each of the specified target size (capped at max_size)
109 |     Returns a list of transformed images, one for each target size. Also returns
110 |     the scale factors that were used to compute each returned image.
111 |     """
112 |     im = im.astype(np.float32, copy=False)
113 |     im -= pixel_means
114 |     im_shape = im.shape
115 |     im_size_min = np.min(im_shape[0:2])
116 |     im_size_max = np.max(im_shape[0:2])
117 | 
118 |     ims = []
119 |     im_scales = []
120 |     for target_size in target_sizes:
121 |         im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
122 |         im_resized = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
123 |                                 interpolation=cv2.INTER_LINEAR)
124 |         ims.append(im_resized)
125 |         im_scales.append(im_scale)
126 |     return ims, im_scales
127 | 
128 | 
129 | def get_im_blob_sizes(im_shape, target_sizes, max_size):
130 |     """Calculate im blob size for multiple target_sizes given original im shape
131 |     """
132 |     im_size_min = np.min(im_shape)
133 |     im_size_max = np.max(im_shape)
134 |     im_sizes = []
135 |     for target_size in target_sizes:
136 |         im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
137 |         im_sizes.append(np.round(im_shape * im_scale))
138 |     return np.array(im_sizes)
139 | 
140 | 
141 | def get_target_scale(im_size_min, im_size_max, target_size, max_size):
142 |     """Calculate target resize scale
143 |     """
144 |     im_scale = float(target_size) / float(im_size_min)
145 |     # Prevent the biggest axis from being more than max_size
146 |     if np.round(im_scale * im_size_max) > max_size:
147 |         im_scale = float(max_size) / float(im_size_max)
148 |     return im_scale
149 | 
150 | 
151 | def zeros(shape, int32=False):
152 |     """Return a blob of all zeros of the given shape with the correct float or
153 |     int data type.
154 |     """
155 |     return np.zeros(shape, dtype=np.int32 if int32 else np.float32)
156 | 
157 | 
158 | def ones(shape, int32=False):
159 |     """Return a blob of all ones of the given shape with the correct float or
160 |     int data type.
161 |     """
162 |     return np.ones(shape, dtype=np.int32 if int32 else np.float32)
163 | 
164 | 
165 | def serialize(obj):
166 |     """Serialize a Python object using pickle and encode it as an array of
167 |     float32 values so that it can be feed into the workspace. See deserialize().
168 |     """
169 |     return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)
170 | 
171 | 
172 | def deserialize(arr):
173 |     """Unserialize a Python object from an array of float32 values fetched from
174 |     a workspace. See serialize().
175 |     """
176 |     return pickle.loads(arr.astype(np.uint8).tobytes())
177 | 


--------------------------------------------------------------------------------
/lib/utils/collections.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """A simple attribute dictionary used for representing configuration options."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | 
24 | class AttrDict(dict):
25 | 
26 |     IMMUTABLE = '__immutable__'
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         super(AttrDict, self).__init__(*args, **kwargs)
30 |         self.__dict__[AttrDict.IMMUTABLE] = False
31 | 
32 |     def __getattr__(self, name):
33 |         if name in self.__dict__:
34 |             return self.__dict__[name]
35 |         elif name in self:
36 |             return self[name]
37 |         else:
38 |             raise AttributeError(name)
39 | 
40 |     def __setattr__(self, name, value):
41 |         if not self.__dict__[AttrDict.IMMUTABLE]:
42 |             if name in self.__dict__:
43 |                 self.__dict__[name] = value
44 |             else:
45 |                 self[name] = value
46 |         else:
47 |             raise AttributeError(
48 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 |                 format(name, value)
50 |             )
51 | 
52 |     def immutable(self, is_immutable):
53 |         """Set immutability to is_immutable and recursively apply the setting
54 |         to all nested AttrDicts.
55 |         """
56 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 |         # Recursively set immutable state
58 |         for v in self.__dict__.values():
59 |             if isinstance(v, AttrDict):
60 |                 v.immutable(is_immutable)
61 |         for v in self.values():
62 |             if isinstance(v, AttrDict):
63 |                 v.immutable(is_immutable)
64 | 
65 |     def is_immutable(self):
66 |         return self.__dict__[AttrDict.IMMUTABLE]
67 | 


--------------------------------------------------------------------------------
/lib/utils/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """An awesome colormap for really neat visualizations."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | import numpy as np
 24 | 
 25 | 
 26 | def colormap(rgb=False):
 27 |     color_list = np.array(
 28 |         [
 29 |             0.000, 0.447, 0.741,
 30 |             0.850, 0.325, 0.098,
 31 |             0.929, 0.694, 0.125,
 32 |             0.494, 0.184, 0.556,
 33 |             0.466, 0.674, 0.188,
 34 |             0.301, 0.745, 0.933,
 35 |             0.635, 0.078, 0.184,
 36 |             0.300, 0.300, 0.300,
 37 |             0.600, 0.600, 0.600,
 38 |             1.000, 0.000, 0.000,
 39 |             1.000, 0.500, 0.000,
 40 |             0.749, 0.749, 0.000,
 41 |             0.000, 1.000, 0.000,
 42 |             0.000, 0.000, 1.000,
 43 |             0.667, 0.000, 1.000,
 44 |             0.333, 0.333, 0.000,
 45 |             0.333, 0.667, 0.000,
 46 |             0.333, 1.000, 0.000,
 47 |             0.667, 0.333, 0.000,
 48 |             0.667, 0.667, 0.000,
 49 |             0.667, 1.000, 0.000,
 50 |             1.000, 0.333, 0.000,
 51 |             1.000, 0.667, 0.000,
 52 |             1.000, 1.000, 0.000,
 53 |             0.000, 0.333, 0.500,
 54 |             0.000, 0.667, 0.500,
 55 |             0.000, 1.000, 0.500,
 56 |             0.333, 0.000, 0.500,
 57 |             0.333, 0.333, 0.500,
 58 |             0.333, 0.667, 0.500,
 59 |             0.333, 1.000, 0.500,
 60 |             0.667, 0.000, 0.500,
 61 |             0.667, 0.333, 0.500,
 62 |             0.667, 0.667, 0.500,
 63 |             0.667, 1.000, 0.500,
 64 |             1.000, 0.000, 0.500,
 65 |             1.000, 0.333, 0.500,
 66 |             1.000, 0.667, 0.500,
 67 |             1.000, 1.000, 0.500,
 68 |             0.000, 0.333, 1.000,
 69 |             0.000, 0.667, 1.000,
 70 |             0.000, 1.000, 1.000,
 71 |             0.333, 0.000, 1.000,
 72 |             0.333, 0.333, 1.000,
 73 |             0.333, 0.667, 1.000,
 74 |             0.333, 1.000, 1.000,
 75 |             0.667, 0.000, 1.000,
 76 |             0.667, 0.333, 1.000,
 77 |             0.667, 0.667, 1.000,
 78 |             0.667, 1.000, 1.000,
 79 |             1.000, 0.000, 1.000,
 80 |             1.000, 0.333, 1.000,
 81 |             1.000, 0.667, 1.000,
 82 |             0.167, 0.000, 0.000,
 83 |             0.333, 0.000, 0.000,
 84 |             0.500, 0.000, 0.000,
 85 |             0.667, 0.000, 0.000,
 86 |             0.833, 0.000, 0.000,
 87 |             1.000, 0.000, 0.000,
 88 |             0.000, 0.167, 0.000,
 89 |             0.000, 0.333, 0.000,
 90 |             0.000, 0.500, 0.000,
 91 |             0.000, 0.667, 0.000,
 92 |             0.000, 0.833, 0.000,
 93 |             0.000, 1.000, 0.000,
 94 |             0.000, 0.000, 0.167,
 95 |             0.000, 0.000, 0.333,
 96 |             0.000, 0.000, 0.500,
 97 |             0.000, 0.000, 0.667,
 98 |             0.000, 0.000, 0.833,
 99 |             0.000, 0.000, 1.000,
100 |             0.000, 0.000, 0.000,
101 |             0.143, 0.143, 0.143,
102 |             0.286, 0.286, 0.286,
103 |             0.429, 0.429, 0.429,
104 |             0.571, 0.571, 0.571,
105 |             0.714, 0.714, 0.714,
106 |             0.857, 0.857, 0.857,
107 |             1.000, 1.000, 1.000
108 |         ]
109 |     ).astype(np.float32)
110 |     color_list = color_list.reshape((-1, 3)) * 255
111 |     if not rgb:
112 |         color_list = color_list[:, ::-1]
113 |     return color_list
114 | 


--------------------------------------------------------------------------------
/lib/utils/cython_bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps
74 | 


--------------------------------------------------------------------------------
/lib/utils/detectron_weight_helper.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for loading pretrained weights from Detectron pickle files
 2 | """
 3 | 
 4 | import pickle
 5 | import re
 6 | import torch
 7 | 
 8 | 
 9 | def load_detectron_weight(net, detectron_weight_file):
10 |     name_mapping, orphan_in_detectron = net.detectron_weight_mapping
11 | 
12 |     with open(detectron_weight_file, 'rb') as fp:
13 |         src_blobs = pickle.load(fp, encoding='latin1')
14 |     if 'blobs' in src_blobs:
15 |         src_blobs = src_blobs['blobs']
16 | 
17 |     params = net.state_dict()
18 |     for p_name, p_tensor in params.items():
19 |         d_name = name_mapping[p_name]
20 |         if isinstance(d_name, str):  # maybe str, None or True
21 |             p_tensor.copy_(torch.Tensor(src_blobs[d_name]))
22 | 
23 | 
24 | def resnet_weights_name_pattern():
25 |     pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+")
26 |     return pattern
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     """Testing"""
31 |     from pprint import pprint
32 |     import sys
33 |     sys.path.insert(0, '..')
34 |     from modeling.model_builder import Generalized_RCNN
35 |     from core.config import cfg, cfg_from_file
36 | 
37 |     cfg.MODEL.NUM_CLASSES = 81
38 |     cfg_from_file('../../cfgs/res50_mask.yml')
39 |     net = Generalized_RCNN()
40 | 
41 |     # pprint(list(net.state_dict().keys()), width=1)
42 | 
43 |     mapping, orphans = net.detectron_weight_mapping
44 |     state_dict = net.state_dict()
45 | 
46 |     for k in mapping.keys():
47 |         assert k in state_dict, '%s' % k
48 | 
49 |     rest = set(state_dict.keys()) - set(mapping.keys())
50 |     assert len(rest) == 0
51 | 


--------------------------------------------------------------------------------
/lib/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Environment helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import os
24 | import sys
25 | 
26 | # Default value of the CMake install prefix
27 | _CMAKE_INSTALL_PREFIX = '/usr/local'
28 | 
29 | 
30 | def get_runtime_dir():
31 |     """Retrieve the path to the runtime directory."""
32 |     return os.getcwd()
33 | 
34 | 
35 | def get_py_bin_ext():
36 |     """Retrieve python binary extension."""
37 |     return '.py'
38 | 
39 | 
40 | def set_up_matplotlib():
41 |     """Set matplotlib up."""
42 |     import matplotlib
43 |     # Use a non-interactive backend
44 |     matplotlib.use('Agg')
45 | 
46 | 
47 | def exit_on_error():
48 |     """Exit from a detectron tool when there's an error."""
49 |     sys.exit(1)
50 | 


--------------------------------------------------------------------------------
/lib/utils/fpn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import utils.boxes as box_utils
 4 | from core.config import cfg
 5 | 
 6 | 
 7 | # ---------------------------------------------------------------------------- #
 8 | # Helper functions for working with multilevel FPN RoIs
 9 | # ---------------------------------------------------------------------------- #
10 | 
11 | def map_rois_to_fpn_levels(rois, k_min, k_max):
12 |     """Determine which FPN level each RoI in a set of RoIs should map to based
13 |     on the heuristic in the FPN paper.
14 |     """
15 |     # Compute level ids
16 |     areas, neg_idx = box_utils.boxes_area(rois)
17 |     areas[neg_idx] = 0  # np.sqrt will remove the entries with negative value
18 |     s = np.sqrt(areas)
19 |     s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
20 |     lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
21 | 
22 |     # Eqn.(1) in FPN paper
23 |     target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
24 |     target_lvls = np.clip(target_lvls, k_min, k_max)
25 | 
26 |     # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs
27 |     # target_lvls[neg_idx] = -1
28 |     return target_lvls
29 | 
30 | 
31 | def add_multilevel_roi_blobs(
32 |         blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max
33 |     ):
34 |     """Add RoI blobs for multiple FPN levels to the blobs dict.
35 | 
36 |     blobs: a dict mapping from blob name to numpy ndarray
37 |     blob_prefix: name prefix to use for the FPN blobs
38 |     rois: the source rois as a 2D numpy array of shape (N, 5) where each row is
39 |       an roi and the columns encode (batch_idx, x1, y1, x2, y2)
40 |     target_lvls: numpy array of shape (N, ) indicating which FPN level each roi
41 |       in rois should be assigned to. -1 means correspoind roi should be discarded.
42 |     lvl_min: the finest (highest resolution) FPN level (e.g., 2)
43 |     lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
44 |     """
45 |     rois_idx_order = np.empty((0, ))
46 |     rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
47 |     # target_lvls = remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls)
48 |     for lvl in range(lvl_min, lvl_max + 1):
49 |         idx_lvl = np.where(target_lvls == lvl)[0]
50 |         blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
51 |         rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
52 |         rois_stacked = np.vstack(
53 |             [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]
54 |         )
55 |     rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
56 |     blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
57 |     # Sanity check that restore order is correct
58 |     assert (rois_stacked[rois_idx_restore] == rois).all()
59 | 
60 | 
61 | def remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls):
62 |     """ Delete roi entries that have negative area (Uncompleted) """
63 |     idx_neg = np.where(target_lvls == -1)[0]
64 |     rois = np.delete(rois, idx_neg, axis=0)
65 |     blobs[blob_prefix] = rois
66 |     target_lvls = np.delete(target_lvls, idx_neg, axis=0)
67 |     #TODO: other blobs in faster_rcnn.get_fast_rcnn_blob_names should also be modified
68 |     return target_lvls
69 | 


--------------------------------------------------------------------------------
/lib/utils/image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Image helper functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import cv2
24 | import numpy as np
25 | 
26 | 
27 | def aspect_ratio_rel(im, aspect_ratio):
28 |     """Performs width-relative aspect ratio transformation."""
29 |     im_h, im_w = im.shape[:2]
30 |     im_ar_w = int(round(aspect_ratio * im_w))
31 |     im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
32 |     return im_ar
33 | 
34 | 
35 | def aspect_ratio_abs(im, aspect_ratio):
36 |     """Performs absolute aspect ratio transformation."""
37 |     im_h, im_w = im.shape[:2]
38 |     im_area = im_h * im_w
39 | 
40 |     im_ar_w = np.sqrt(im_area * aspect_ratio)
41 |     im_ar_h = np.sqrt(im_area / aspect_ratio)
42 |     assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
43 | 
44 |     im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
45 |     return im_ar
46 | 


--------------------------------------------------------------------------------
/lib/utils/io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """IO utilities."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | from __future__ import unicode_literals
 22 | 
 23 | from six.moves import cPickle as pickle
 24 | import hashlib
 25 | import logging
 26 | import os
 27 | import re
 28 | import sys
 29 | try:
 30 |     from urllib.request import urlopen
 31 | except ImportError:  #python2
 32 |     from urllib2 import urlopen
 33 | 
 34 | logger = logging.getLogger(__name__)
 35 | 
 36 | _DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'
 37 | 
 38 | 
 39 | def save_object(obj, file_name):
 40 |     """Save a Python object by pickling it."""
 41 |     file_name = os.path.abspath(file_name)
 42 |     with open(file_name, 'wb') as f:
 43 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
 44 | 
 45 | 
 46 | def cache_url(url_or_file, cache_dir):
 47 |     """Download the file specified by the URL to the cache_dir and return the
 48 |     path to the cached file. If the argument is not a URL, simply return it as
 49 |     is.
 50 |     """
 51 |     is_url = re.match(r'^(?:http)s?://', url_or_file, re.IGNORECASE) is not None
 52 | 
 53 |     if not is_url:
 54 |         return url_or_file
 55 | 
 56 |     url = url_or_file
 57 |     assert url.startswith(_DETECTRON_S3_BASE_URL), \
 58 |         ('Detectron only automatically caches URLs in the Detectron S3 '
 59 |          'bucket: {}').format(_DETECTRON_S3_BASE_URL)
 60 | 
 61 |     cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
 62 |     if os.path.exists(cache_file_path):
 63 |         assert_cache_file_is_ok(url, cache_file_path)
 64 |         return cache_file_path
 65 | 
 66 |     cache_file_dir = os.path.dirname(cache_file_path)
 67 |     if not os.path.exists(cache_file_dir):
 68 |         os.makedirs(cache_file_dir)
 69 | 
 70 |     logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
 71 |     download_url(url, cache_file_path)
 72 |     assert_cache_file_is_ok(url, cache_file_path)
 73 |     return cache_file_path
 74 | 
 75 | 
 76 | def assert_cache_file_is_ok(url, file_path):
 77 |     """Check that cache file has the correct hash."""
 78 |     # File is already in the cache, verify that the md5sum matches and
 79 |     # return local path
 80 |     cache_file_md5sum = _get_file_md5sum(file_path)
 81 |     ref_md5sum = _get_reference_md5sum(url)
 82 |     assert cache_file_md5sum == ref_md5sum, \
 83 |         ('Target URL {} appears to be downloaded to the local cache file '
 84 |          '{}, but the md5 hash of the local file does not match the '
 85 |          'reference (actual: {} vs. expected: {}). You may wish to delete '
 86 |          'the cached file and try again to trigger automatic '
 87 |          'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)
 88 | 
 89 | 
 90 | def _progress_bar(count, total):
 91 |     """Report download progress.
 92 |     Credit:
 93 |     https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
 94 |     """
 95 |     bar_len = 60
 96 |     filled_len = int(round(bar_len * count / float(total)))
 97 | 
 98 |     percents = round(100.0 * count / float(total), 1)
 99 |     bar = '=' * filled_len + '-' * (bar_len - filled_len)
100 | 
101 |     sys.stdout.write(
102 |         '  [{}] {}% of {:.1f}MB file  \r'.
103 |         format(bar, percents, total / 1024 / 1024)
104 |     )
105 |     sys.stdout.flush()
106 |     if count >= total:
107 |         sys.stdout.write('\n')
108 | 
109 | 
110 | def download_url(
111 |     url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar
112 | ):
113 |     """Download url and write it to dst_file_path.
114 |     Credit:
115 |     https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
116 |     """
117 |     response = urlopen(url)
118 |     total_size = response.info().getheader('Content-Length').strip()
119 |     total_size = int(total_size)
120 |     bytes_so_far = 0
121 | 
122 |     with open(dst_file_path, 'wb') as f:
123 |         while 1:
124 |             chunk = response.read(chunk_size)
125 |             bytes_so_far += len(chunk)
126 |             if not chunk:
127 |                 break
128 |             if progress_hook:
129 |                 progress_hook(bytes_so_far, total_size)
130 |             f.write(chunk)
131 | 
132 |     return bytes_so_far
133 | 
134 | 
135 | def _get_file_md5sum(file_name):
136 |     """Compute the md5 hash of a file."""
137 |     hash_obj = hashlib.md5()
138 |     with open(file_name, 'r') as f:
139 |         hash_obj.update(f.read())
140 |     return hash_obj.hexdigest()
141 | 
142 | 
143 | def _get_reference_md5sum(url):
144 |     """By convention the md5 hash for url is stored in url + '.md5sum'."""
145 |     url_md5sum = url + '.md5sum'
146 |     md5sum = urlopen(url_md5sum).read().strip()
147 |     return md5sum
148 | 


--------------------------------------------------------------------------------
/lib/utils/logging.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Utilities for logging."""
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | from __future__ import unicode_literals
 21 | 
 22 | from collections import deque
 23 | from email.mime.text import MIMEText
 24 | import json
 25 | import logging
 26 | import numpy as np
 27 | import smtplib
 28 | import sys
 29 | 
 30 | from core.config import cfg
 31 | 
 32 | # Print lower precision floating point values than default FLOAT_REPR
 33 | # Note! Has no use for json encode with C speedups
 34 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f')
 35 | 
 36 | 
 37 | def log_json_stats(stats, sort_keys=True):
 38 |     print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))
 39 | 
 40 | 
 41 | def log_stats(stats, misc_args):
 42 |     """Log training statistics to terminal"""
 43 |     if hasattr(misc_args, 'epoch'):
 44 |         lines = "[%s][%s][Epoch %d][Iter %d / %d]\n" % (
 45 |             misc_args.run_name, misc_args.cfg_filename,
 46 |             misc_args.epoch, misc_args.step, misc_args.iters_per_epoch)
 47 |     else:
 48 |         lines = "[%s][%s][Step %d / %d]\n" % (
 49 |             misc_args.run_name, misc_args.cfg_filename, stats['iter'], cfg.SOLVER.MAX_ITER)
 50 | 
 51 |     lines += "\t\tloss: %.6f, lr: %.6f time: %.6f, eta: %s\n" % (
 52 |         stats['loss'], stats['lr'], stats['time'], stats['eta']
 53 |     )
 54 |     if stats['metrics']:
 55 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['metrics'].items()) + "\n"
 56 |     if stats['head_losses']:
 57 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['head_losses'].items()) + "\n"
 58 |     if cfg.RPN.RPN_ON:
 59 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_losses'].items()) + "\n"
 60 |     if cfg.FPN.FPN_ON:
 61 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_cls_losses'].items()) + "\n"
 62 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_bbox_losses'].items()) + "\n"
 63 |     print(lines[:-1])  # remove last new line
 64 | 
 65 | 
 66 | class SmoothedValue(object):
 67 |     """Track a series of values and provide access to smoothed values over a
 68 |     window or the global series average.
 69 |     """
 70 | 
 71 |     def __init__(self, window_size):
 72 |         self.deque = deque(maxlen=window_size)
 73 |         self.series = []
 74 |         self.total = 0.0
 75 |         self.count = 0
 76 | 
 77 |     def AddValue(self, value):
 78 |         self.deque.append(value)
 79 |         self.series.append(value)
 80 |         self.count += 1
 81 |         self.total += value
 82 | 
 83 |     def GetMedianValue(self):
 84 |         return np.median(self.deque)
 85 | 
 86 |     def GetAverageValue(self):
 87 |         return np.mean(self.deque)
 88 | 
 89 |     def GetGlobalAverageValue(self):
 90 |         return self.total / self.count
 91 | 
 92 | 
 93 | def send_email(subject, body, to):
 94 |     s = smtplib.SMTP('localhost')
 95 |     mime = MIMEText(body)
 96 |     mime['Subject'] = subject
 97 |     mime['To'] = to
 98 |     s.sendmail('detectron', to, mime.as_string())
 99 | 
100 | 
101 | def setup_logging(name):
102 |     FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
103 |     # Manually clear root loggers to prevent any module that may have called
104 |     # logging.basicConfig() from blocking our logging setup
105 |     logging.root.handlers = []
106 |     logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
107 |     logger = logging.getLogger(name)
108 |     return logger
109 | 


--------------------------------------------------------------------------------
/lib/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import socket
  3 | from collections import defaultdict, Iterable
  4 | from copy import deepcopy
  5 | from datetime import datetime
  6 | from itertools import chain
  7 | 
  8 | import torch
  9 | 
 10 | from core.config import cfg
 11 | 
 12 | 
 13 | def get_run_name():
 14 |     """ A unique name for each run """
 15 |     return datetime.now().strftime(
 16 |         '%b%d-%H-%M-%S') + '_' + socket.gethostname()
 17 | 
 18 | 
 19 | def get_output_dir(args, run_name):
 20 |     """ Get root output directory for each run """
 21 |     cfg_filename, _ = os.path.splitext(os.path.split(args.cfg_file)[1])
 22 |     return os.path.join(cfg.OUTPUT_DIR, cfg_filename, run_name)
 23 | 
 24 | 
 25 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm']
 26 | 
 27 | 
 28 | def is_image_file(filename):
 29 |     """Checks if a file is an image.
 30 |       Args:
 31 |           filename (string): path to a file
 32 |       Returns:
 33 |           bool: True if the filename ends with a known image extension
 34 |     """
 35 |     filename_lower = filename.lower()
 36 |     return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)
 37 | 
 38 | 
 39 | def get_imagelist_from_dir(dirpath):
 40 |     images = []
 41 |     for f in os.listdir(dirpath):
 42 |         if is_image_file(f):
 43 |             images.append(os.path.join(dirpath, f))
 44 |     return images
 45 | 
 46 | 
 47 | def ensure_optimizer_ckpt_params_order(param_groups_names, checkpoint):
 48 |     """Reorder the parameter ids in the SGD optimizer checkpoint to match
 49 |     the current order in the program, in case parameter insertion order is changed.
 50 |     """
 51 |     assert len(param_groups_names) == len(checkpoint['optimizer']['param_groups'])
 52 |     param_lens = (len(g) for g in param_groups_names)
 53 |     saved_lens = (len(g['params']) for g in checkpoint['optimizer']['param_groups'])
 54 |     if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)):
 55 |         raise ValueError("loaded state dict contains a parameter group "
 56 |                          "that doesn't match the size of optimizer's group")
 57 | 
 58 |     name_to_curpos = {}
 59 |     for i, p_names in enumerate(param_groups_names):
 60 |         for j, name in enumerate(p_names):
 61 |             name_to_curpos[name] = (i, j)
 62 | 
 63 |     param_groups_inds = [[] for _ in range(len(param_groups_names))]
 64 |     cnts = [0] * len(param_groups_names)
 65 |     for key in checkpoint['model']:
 66 |         pos = name_to_curpos.get(key)
 67 |         if pos:
 68 |             # print(key, pos, cnts[pos[0]])
 69 |             saved_p_id = checkpoint['optimizer']['param_groups'][pos[0]]['params'][cnts[pos[0]]]
 70 |             assert (checkpoint['model'][key].shape ==
 71 |                     checkpoint['optimizer']['state'][saved_p_id]['momentum_buffer'].shape), \
 72 |                    ('param and momentum_buffer shape mismatch in checkpoint.'
 73 |                     ' param_name: {}, param_id: {}'.format(key, saved_p_id))
 74 |             param_groups_inds[pos[0]].append(pos[1])
 75 |             cnts[pos[0]] += 1
 76 | 
 77 |     for cnt, param_inds in enumerate(param_groups_inds):
 78 |         ckpt_params = checkpoint['optimizer']['param_groups'][cnt]['params']
 79 |         assert len(ckpt_params) == len(param_inds)
 80 |         ckpt_params = [x for x, _ in sorted(zip(ckpt_params, param_inds), key=lambda x: x[1])]
 81 |         checkpoint['optimizer']['param_groups'][cnt]['params'] = ckpt_params
 82 | 
 83 | 
 84 | def load_optimizer_state_dict(optimizer, state_dict):
 85 |     # deepcopy, to be consistent with module API
 86 |     state_dict = deepcopy(state_dict)
 87 |     # Validate the state_dict
 88 |     groups = optimizer.param_groups
 89 |     saved_groups = state_dict['param_groups']
 90 | 
 91 |     if len(groups) != len(saved_groups):
 92 |         raise ValueError("loaded state dict has a different number of "
 93 |                          "parameter groups")
 94 |     param_lens = (len(g['params']) for g in groups)
 95 |     saved_lens = (len(g['params']) for g in saved_groups)
 96 |     if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)):
 97 |         raise ValueError("loaded state dict contains a parameter group "
 98 |                          "that doesn't match the size of optimizer's group")
 99 | 
100 |     # Update the state
101 |     id_map = {old_id: p for old_id, p in
102 |                 zip(chain(*(g['params'] for g in saved_groups)),
103 |                     chain(*(g['params'] for g in groups)))}
104 | 
105 |     def cast(param, value):
106 |         """Make a deep copy of value, casting all tensors to device of param."""
107 |         if torch.is_tensor(value):
108 |             # Floating-point types are a bit special here. They are the only ones
109 |             # that are assumed to always match the type of params.
110 |             if isinstance(param.data, (torch.FloatTensor, torch.cuda.FloatTensor,
111 |                                        torch.DoubleTensor, torch.cuda.DoubleTensor,
112 |                                        torch.HalfTensor, torch.cuda.HalfTensor)):  # param.is_floating_point():
113 |                 value = value.type_as(param.data)
114 |             value = value.cuda(param.get_device()) if param.is_cuda else value.cpu()
115 |             return value
116 |         elif isinstance(value, dict):
117 |             return {k: cast(param, v) for k, v in value.items()}
118 |         elif isinstance(value, Iterable):
119 |             return type(value)(cast(param, v) for v in value)
120 |         else:
121 |             return value
122 | 
123 |     # Copy state assigned to params (and cast tensors to appropriate types).
124 |     # State that is not assigned to params is copied as is (needed for
125 |     # backward compatibility).
126 |     state = defaultdict(dict)
127 |     for k, v in state_dict['state'].items():
128 |         if k in id_map:
129 |             param = id_map[k]
130 |             state[param] = cast(param, v)
131 |         else:
132 |             state[k] = v
133 | 
134 |     # Update parameter groups, setting their 'params' value
135 |     def update_group(group, new_group):
136 |         new_group['params'] = group['params']
137 |         return new_group
138 |     param_groups = [
139 |         update_group(g, ng) for g, ng in zip(groups, saved_groups)]
140 |     optimizer.__setstate__({'state': state, 'param_groups': param_groups})
141 | 


--------------------------------------------------------------------------------
/lib/utils/net.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | from core.config import cfg
 10 | import nn as mynn
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0):
 16 |     """
 17 |     SmoothL1(x) = 0.5 * x^2 / beta      if |x| < beta
 18 |                   |x| - 0.5 * beta      otherwise.
 19 |     1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])).
 20 |     N is the number of batch elements in the input predictions
 21 |     """
 22 |     box_diff = bbox_pred - bbox_targets
 23 |     in_box_diff = bbox_inside_weights * box_diff
 24 |     abs_in_box_diff = torch.abs(in_box_diff)
 25 |     smoothL1_sign = (abs_in_box_diff < beta).detach().float()
 26 |     in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \
 27 |                   (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta))
 28 |     out_loss_box = bbox_outside_weights * in_loss_box
 29 |     loss_box = out_loss_box
 30 |     N = loss_box.size(0)  # batch size
 31 |     loss_box = loss_box.view(-1).sum(0) / N
 32 |     return loss_box
 33 | 
 34 | 
 35 | def clip_gradient(model, clip_norm):
 36 |     """Computes a gradient clipping coefficient based on gradient norm."""
 37 |     totalnorm = 0
 38 |     for p in model.parameters():
 39 |         if p.requires_grad:
 40 |             modulenorm = p.grad.data.norm()
 41 |             totalnorm += modulenorm ** 2
 42 |     totalnorm = np.sqrt(totalnorm)
 43 | 
 44 |     norm = clip_norm / max(totalnorm, clip_norm)
 45 |     for p in model.parameters():
 46 |         if p.requires_grad:
 47 |             p.grad.mul_(norm)
 48 | 
 49 | 
 50 | def decay_learning_rate(optimizer, cur_lr, decay_rate):
 51 |     """Decay learning rate"""
 52 |     new_lr = cur_lr * decay_rate
 53 |     # ratio = _get_lr_change_ratio(cur_lr, new_lr)
 54 |     ratio = 1 / decay_rate
 55 |     if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
 56 |         logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr)
 57 |     # Update learning rate, note that different parameter may have different learning rate
 58 |     for param_group in optimizer.param_groups:
 59 |         cur_lr = param_group['lr']
 60 |         new_lr = decay_rate * param_group['lr']
 61 |         param_group['lr'] = new_lr
 62 |         if cfg.SOLVER.TYPE in ['SGD']:
 63 |             if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
 64 |                     ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
 65 |                 _CorrectMomentum(optimizer, param_group['params'], new_lr / cur_lr)
 66 | 
 67 | def update_learning_rate(optimizer, cur_lr, new_lr):
 68 |     """Update learning rate"""
 69 |     if cur_lr != new_lr:
 70 |         ratio = _get_lr_change_ratio(cur_lr, new_lr)
 71 |         if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
 72 |             logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr)
 73 |         # Update learning rate, note that different parameter may have different learning rate
 74 |         param_keys = []
 75 |         for ind, param_group in enumerate(optimizer.param_groups):
 76 |             if ind == 1 and cfg.SOLVER.BIAS_DOUBLE_LR:  # bias params
 77 |                 param_group['lr'] = new_lr * 2
 78 |             else:
 79 |                 param_group['lr'] = new_lr
 80 |             param_keys += param_group['params']
 81 |         if cfg.SOLVER.TYPE in ['SGD'] and cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
 82 |                 ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
 83 |             _CorrectMomentum(optimizer, param_keys, new_lr / cur_lr)
 84 | 
 85 | 
 86 | def _CorrectMomentum(optimizer, param_keys, correction):
 87 |     """The MomentumSGDUpdate op implements the update V as
 88 | 
 89 |         V := mu * V + lr * grad,
 90 | 
 91 |     where mu is the momentum factor, lr is the learning rate, and grad is
 92 |     the stochastic gradient. Since V is not defined independently of the
 93 |     learning rate (as it should ideally be), when the learning rate is
 94 |     changed we should scale the update history V in order to make it
 95 |     compatible in scale with lr * grad.
 96 |     """
 97 |     logger.info('Scaling update history by %.6f (new lr / old lr)', correction)
 98 |     for p_key in param_keys:
 99 |         optimizer.state[p_key]['momentum_buffer'] *= correction
100 | 
101 | 
102 | def _get_lr_change_ratio(cur_lr, new_lr):
103 |     eps = 1e-10
104 |     ratio = np.max(
105 |         (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))
106 |     )
107 |     return ratio
108 | 
109 | 
110 | def affine_grid_gen(rois, input_size, grid_size):
111 | 
112 |     rois = rois.detach()
113 |     x1 = rois[:, 1::4] / 16.0
114 |     y1 = rois[:, 2::4] / 16.0
115 |     x2 = rois[:, 3::4] / 16.0
116 |     y2 = rois[:, 4::4] / 16.0
117 | 
118 |     height = input_size[0]
119 |     width = input_size[1]
120 | 
121 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
122 |     theta = torch.cat([\
123 |       (x2 - x1) / (width - 1),
124 |       zero,
125 |       (x1 + x2 - width + 1) / (width - 1),
126 |       zero,
127 |       (y2 - y1) / (height - 1),
128 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
129 | 
130 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
131 | 
132 |     return grid
133 | 
134 | 
135 | def save_ckpt(output_dir, args, model, optimizer):
136 |     """Save checkpoint"""
137 |     if args.no_save:
138 |         return
139 |     ckpt_dir = os.path.join(output_dir, 'ckpt')
140 |     if not os.path.exists(ckpt_dir):
141 |         os.makedirs(ckpt_dir)
142 |     save_name = os.path.join(ckpt_dir, 'model_{}_{}.pth'.format(args.epoch, args.step))
143 |     if isinstance(model, mynn.DataParallel):
144 |         model = model.module
145 |     # TODO: (maybe) Do not save redundant shared params
146 |     # model_state_dict = model.state_dict()
147 |     torch.save({
148 |         'epoch': args.epoch,
149 |         'step': args.step,
150 |         'iters_per_epoch': args.iters_per_epoch,
151 |         'model': model.state_dict(),
152 |         'optimizer': optimizer.state_dict()}, save_name)
153 |     logger.info('save model: %s', save_name)
154 | 
155 | 
156 | def load_ckpt(model, ckpt):
157 |     """Load checkpoint"""
158 |     mapping, _ = model.detectron_weight_mapping
159 |     state_dict = {}
160 |     for name in ckpt:
161 |         if mapping[name]:
162 |             state_dict[name] = ckpt[name]
163 |     model.load_state_dict(state_dict, strict=False)
164 | 
165 | 
166 | def get_group_gn(dim):
167 |     """
168 |     get number of groups used by GroupNorm, based on number of channels
169 |     """
170 |     dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP
171 |     num_groups = cfg.GROUP_NORM.NUM_GROUPS
172 | 
173 |     assert dim_per_gp == -1 or num_groups == -1, \
174 |         "GroupNorm: can only specify G or C/G."
175 | 
176 |     if dim_per_gp > 0:
177 |         assert dim % dim_per_gp == 0
178 |         group_gn = dim // dim_per_gp
179 |     else:
180 |         assert dim % num_groups == 0
181 |         group_gn = num_groups
182 |     return group_gn
183 | 


--------------------------------------------------------------------------------
/lib/utils/net_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | import torchvision.models as models
  7 | from core.config import cfg
  8 | from model.roi_crop.functions.roi_crop import RoICropFunction
  9 | import cv2
 10 | import pdb
 11 | import random
 12 | 
 13 | def save_net(fname, net):
 14 |     import h5py
 15 |     h5f = h5py.File(fname, mode='w')
 16 |     for k, v in net.state_dict().items():
 17 |         h5f.create_dataset(k, data=v.cpu().numpy())
 18 | 
 19 | def load_net(fname, net):
 20 |     import h5py
 21 |     h5f = h5py.File(fname, mode='r')
 22 |     for k, v in net.state_dict().items():
 23 |         param = torch.from_numpy(np.asarray(h5f[k]))
 24 |         v.copy_(param)
 25 | 
 26 | def weights_normal_init(model, dev=0.01):
 27 |     if isinstance(model, list):
 28 |         for m in model:
 29 |             weights_normal_init(m, dev)
 30 |     else:
 31 |         for m in model.modules():
 32 |             if isinstance(m, nn.Conv2d):
 33 |                 m.weight.data.normal_(0.0, dev)
 34 |             elif isinstance(m, nn.Linear):
 35 |                 m.weight.data.normal_(0.0, dev)
 36 | 
 37 | 
 38 | def _crop_pool_layer(bottom, rois, max_pool=True):
 39 |     # code modified from 
 40 |     # https://github.com/ruotianluo/pytorch-faster-rcnn
 41 |     # implement it using stn
 42 |     # box to affine
 43 |     # input (x1,y1,x2,y2)
 44 |     """
 45 |     [  x2-x1             x1 + x2 - W + 1  ]
 46 |     [  -----      0      ---------------  ]
 47 |     [  W - 1                  W - 1       ]
 48 |     [                                     ]
 49 |     [           y2-y1    y1 + y2 - H + 1  ]
 50 |     [    0      -----    ---------------  ]
 51 |     [           H - 1         H - 1      ]
 52 |     """
 53 |     rois = rois.detach()
 54 |     batch_size = bottom.size(0)
 55 |     D = bottom.size(1)
 56 |     H = bottom.size(2)
 57 |     W = bottom.size(3)
 58 |     roi_per_batch = rois.size(0) / batch_size
 59 |     x1 = rois[:, 1::4] / 16.0
 60 |     y1 = rois[:, 2::4] / 16.0
 61 |     x2 = rois[:, 3::4] / 16.0
 62 |     y2 = rois[:, 4::4] / 16.0
 63 | 
 64 |     height = bottom.size(2)
 65 |     width = bottom.size(3)
 66 | 
 67 |     # affine theta
 68 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
 69 |     theta = torch.cat([\
 70 |       (x2 - x1) / (width - 1),
 71 |       zero,
 72 |       (x1 + x2 - width + 1) / (width - 1),
 73 |       zero,
 74 |       (y2 - y1) / (height - 1),
 75 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
 76 | 
 77 |     if max_pool:
 78 |       pre_pool_size = cfg.POOLING_SIZE * 2
 79 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
 80 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
 81 |                                                                 .contiguous().view(-1, D, H, W)
 82 |       crops = F.grid_sample(bottom, grid)
 83 |       crops = F.max_pool2d(crops, 2, 2)
 84 |     else:
 85 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
 86 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
 87 |                                                                 .contiguous().view(-1, D, H, W)
 88 |       crops = F.grid_sample(bottom, grid)
 89 |     
 90 |     return crops, grid
 91 | 
 92 | def _affine_grid_gen(rois, input_size, grid_size):
 93 | 
 94 |     rois = rois.detach()
 95 |     x1 = rois[:, 1::4] / 16.0
 96 |     y1 = rois[:, 2::4] / 16.0
 97 |     x2 = rois[:, 3::4] / 16.0
 98 |     y2 = rois[:, 4::4] / 16.0
 99 | 
100 |     height = input_size[0]
101 |     width = input_size[1]
102 | 
103 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
104 |     theta = torch.cat([\
105 |       (x2 - x1) / (width - 1),
106 |       zero,
107 |       (x1 + x2 - width + 1) / (width - 1),
108 |       zero,
109 |       (y2 - y1) / (height - 1),
110 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
111 | 
112 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
113 | 
114 |     return grid
115 | 
116 | def _affine_theta(rois, input_size):
117 | 
118 |     rois = rois.detach()
119 |     x1 = rois[:, 1::4] / 16.0
120 |     y1 = rois[:, 2::4] / 16.0
121 |     x2 = rois[:, 3::4] / 16.0
122 |     y2 = rois[:, 4::4] / 16.0
123 | 
124 |     height = input_size[0]
125 |     width = input_size[1]
126 | 
127 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
128 | 
129 |     # theta = torch.cat([\
130 |     #   (x2 - x1) / (width - 1),
131 |     #   zero,
132 |     #   (x1 + x2 - width + 1) / (width - 1),
133 |     #   zero,
134 |     #   (y2 - y1) / (height - 1),
135 |     #   (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
136 | 
137 |     theta = torch.cat([\
138 |       (y2 - y1) / (height - 1),
139 |       zero,
140 |       (y1 + y2 - height + 1) / (height - 1),
141 |       zero,
142 |       (x2 - x1) / (width - 1),
143 |       (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
144 | 
145 |     return theta
146 | 
147 | def compare_grid_sample():
148 |     # do gradcheck
149 |     N = random.randint(1, 8)
150 |     C = 2 # random.randint(1, 8)
151 |     H = 5 # random.randint(1, 8)
152 |     W = 4 # random.randint(1, 8)
153 |     input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True)
154 |     input_p = input.clone().data.contiguous()
155 |    
156 |     grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True)
157 |     grid_clone = grid.clone().contiguous()
158 | 
159 |     out_offcial = F.grid_sample(input, grid)    
160 |     grad_outputs = Variable(torch.rand(out_offcial.size()).cuda())
161 |     grad_outputs_clone = grad_outputs.clone().contiguous()
162 |     grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous())
163 |     grad_input_off = grad_inputs[0]
164 | 
165 | 
166 |     crf = RoICropFunction()
167 |     grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda()
168 |     out_stn = crf.forward(input_p, grid_yx)
169 |     grad_inputs = crf.backward(grad_outputs_clone.data)
170 |     grad_input_stn = grad_inputs[0]
171 |     pdb.set_trace()
172 | 
173 |     delta = (grad_input_off.data - grad_input_stn).sum()
174 | 


--------------------------------------------------------------------------------
/lib/utils/resnet_weights_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for converting resnet pretrained weights from other formats
 3 | """
 4 | import os
 5 | import pickle
 6 | 
 7 | import torch
 8 | 
 9 | import nn as mynn
10 | import utils.detectron_weight_helper as dwh
11 | from core.config import cfg
12 | 
13 | 
14 | def load_pretrained_imagenet_weights(model):
15 |     """Load pretrained weights
16 |     Args:
17 |         num_layers: 50 for res50 and so on.
18 |         model: the generalized rcnnn module
19 |     """
20 |     _, ext = os.path.splitext(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS)
21 |     if ext == '.pkl':
22 |         with open(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS, 'rb') as fp:
23 |             src_blobs = pickle.load(fp, encoding='latin1')
24 |         if 'blobs' in src_blobs:
25 |             src_blobs = src_blobs['blobs']
26 |         pretrianed_state_dict = src_blobs
27 |     else:
28 |         weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS)
29 |         pretrianed_state_dict = convert_state_dict(torch.load(weights_file))
30 | 
31 |         # Convert batchnorm weights
32 |         for name, mod in model.named_modules():
33 |             if isinstance(mod, mynn.AffineChannel2d):
34 |                 if cfg.FPN.FPN_ON:
35 |                     pretrianed_name = name.split('.', 2)[-1]
36 |                 else:
37 |                     pretrianed_name = name.split('.', 1)[-1]
38 |                 bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean']
39 |                 bn_var = pretrianed_state_dict[pretrianed_name + '.running_var']
40 |                 scale = pretrianed_state_dict[pretrianed_name + '.weight']
41 |                 bias = pretrianed_state_dict[pretrianed_name + '.bias']
42 |                 std = torch.sqrt(bn_var + 1e-5)
43 |                 new_scale = scale / std
44 |                 new_bias = bias - bn_mean * scale / std
45 |                 pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale
46 |                 pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias
47 | 
48 |     model_state_dict = model.state_dict()
49 | 
50 |     pattern = dwh.resnet_weights_name_pattern()
51 | 
52 |     name_mapping, _ = model.detectron_weight_mapping
53 | 
54 |     for k, v in name_mapping.items():
55 |         if isinstance(v, str):  # maybe a str, None or True
56 |             if pattern.match(v):
57 |                 if cfg.FPN.FPN_ON:
58 |                     pretrianed_key = k.split('.', 2)[-1]
59 |                 else:
60 |                     pretrianed_key = k.split('.', 1)[-1]
61 |                 if ext == '.pkl':
62 |                     model_state_dict[k].copy_(torch.Tensor(pretrianed_state_dict[v]))
63 |                 else:
64 |                     model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key])
65 | 
66 | 
67 | def convert_state_dict(src_dict):
68 |     """Return the correct mapping of tensor name and value
69 | 
70 |     Mapping from the names of torchvision model to our resnet conv_body and box_head.
71 |     """
72 |     dst_dict = {}
73 |     for k, v in src_dict.items():
74 |         toks = k.split('.')
75 |         if k.startswith('layer'):
76 |             assert len(toks[0]) == 6
77 |             res_id = int(toks[0][5]) + 1
78 |             name = '.'.join(['res%d' % res_id] + toks[1:])
79 |             dst_dict[name] = v
80 |         elif k.startswith('fc'):
81 |             continue
82 |         else:
83 |             name = '.'.join(['res1'] + toks)
84 |             dst_dict[name] = v
85 |     return dst_dict
86 | 


--------------------------------------------------------------------------------
/lib/utils/subprocess.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | 
 16 | """Primitives for running multiple single-GPU jobs in parallel over subranges of
 17 | data. These are used for running multi-GPU inference. Subprocesses are used to
 18 | avoid the GIL since inference may involve non-trivial amounts of Python code.
 19 | """
 20 | 
 21 | # from __future__ import absolute_import
 22 | # from __future__ import division
 23 | # from __future__ import print_function
 24 | # from __future__ import unicode_literals
 25 | 
 26 | from io import IOBase
 27 | import logging
 28 | import os
 29 | import subprocess
 30 | from six.moves import shlex_quote
 31 | from six.moves import cPickle as pickle
 32 | import yaml
 33 | import numpy as np
 34 | import torch
 35 | 
 36 | from core.config import cfg
 37 | 
 38 | logger = logging.getLogger(__name__)
 39 | 
 40 | 
 41 | def process_in_parallel(
 42 |         tag, total_range_size, binary, output_dir,
 43 |         load_ckpt, load_detectron, opts=''):
 44 |     """Run the specified binary NUM_GPUS times in parallel, each time as a
 45 |     subprocess that uses one GPU. The binary must accept the command line
 46 |     arguments `--range {start} {end}` that specify a data processing range.
 47 |     """
 48 |     # Snapshot the current cfg state in order to pass to the inference
 49 |     # subprocesses
 50 |     cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))
 51 |     with open(cfg_file, 'w') as f:
 52 |         yaml.dump(cfg, stream=f)
 53 |     subprocess_env = os.environ.copy()
 54 |     processes = []
 55 |     NUM_GPUS = torch.cuda.device_count()
 56 |     subinds = np.array_split(range(total_range_size), NUM_GPUS)
 57 |     # Determine GPUs to use
 58 |     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
 59 |     if cuda_visible_devices:
 60 |         gpu_inds = list(map(int, cuda_visible_devices.split(',')))
 61 |         assert -1 not in gpu_inds, \
 62 |             'Hiding GPU indices using the \'-1\' index is not supported'
 63 |     else:
 64 |         gpu_inds = range(cfg.NUM_GPUS)
 65 |     gpu_inds = list(gpu_inds)
 66 |     # Run the binary in cfg.NUM_GPUS subprocesses
 67 |     for i, gpu_ind in enumerate(gpu_inds):
 68 |         start = subinds[i][0]
 69 |         end = subinds[i][-1] + 1
 70 |         subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
 71 |         cmd = ('python {binary} --range {start} {end} --cfg {cfg_file} --set {opts} '
 72 |                '--output_dir {output_dir}')
 73 |         if load_ckpt is not None:
 74 |             cmd += ' --load_ckpt {load_ckpt}'
 75 |         elif load_detectron is not None:
 76 |             cmd += ' --load_detectron {load_detectron}'
 77 |         cmd = cmd.format(
 78 |             binary=shlex_quote(binary),
 79 |             start=int(start),
 80 |             end=int(end),
 81 |             cfg_file=shlex_quote(cfg_file),
 82 |             output_dir=output_dir,
 83 |             load_ckpt=load_ckpt,
 84 |             load_detectron=load_detectron,
 85 |             opts=' '.join([shlex_quote(opt) for opt in opts])
 86 |         )
 87 |         logger.info('{} range command {}: {}'.format(tag, i, cmd))
 88 |         if i == 0:
 89 |             subprocess_stdout = subprocess.PIPE
 90 |         else:
 91 |             filename = os.path.join(
 92 |                 output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
 93 |             )
 94 |             subprocess_stdout = open(filename, 'w')
 95 |         p = subprocess.Popen(
 96 |             cmd,
 97 |             shell=True,
 98 |             env=subprocess_env,
 99 |             stdout=subprocess_stdout,
100 |             stderr=subprocess.STDOUT,
101 |             bufsize=1
102 |         )
103 |         processes.append((i, p, start, end, subprocess_stdout))
104 |     # Log output from inference processes and collate their results
105 |     outputs = []
106 |     for i, p, start, end, subprocess_stdout in processes:
107 |         log_subprocess_output(i, p, output_dir, tag, start, end)
108 |         if isinstance(subprocess_stdout, IOBase):
109 |             subprocess_stdout.close()
110 |         range_file = os.path.join(
111 |             output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
112 |         )
113 |         range_data = pickle.load(open(range_file, 'rb'))
114 |         outputs.append(range_data)
115 |     return outputs
116 | 
117 | 
118 | def log_subprocess_output(i, p, output_dir, tag, start, end):
119 |     """Capture the output of each subprocess and log it in the parent process.
120 |     The first subprocess's output is logged in realtime. The output from the
121 |     other subprocesses is buffered and then printed all at once (in order) when
122 |     subprocesses finish.
123 |     """
124 |     outfile = os.path.join(
125 |         output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
126 |     )
127 |     logger.info('# ' + '-' * 76 + ' #')
128 |     logger.info(
129 |         'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)
130 |     )
131 |     logger.info('# ' + '-' * 76 + ' #')
132 |     if i == 0:
133 |         # Stream the piped stdout from the first subprocess in realtime
134 |         with open(outfile, 'w') as f:
135 |             for line in iter(p.stdout.readline, b''):
136 |                 print(line.rstrip().decode('ascii'))
137 |                 f.write(str(line, encoding='ascii'))
138 |         p.stdout.close()
139 |         ret = p.wait()
140 |     else:
141 |         # For subprocesses >= 1, wait and dump their log file
142 |         ret = p.wait()
143 |         with open(outfile, 'r') as f:
144 |             print(''.join(f.readlines()))
145 |     assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)
146 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import time
 7 | 
 8 | 
 9 | class Timer(object):
10 |   """A simple timer."""
11 | 
12 |   def __init__(self):
13 |     self.reset()
14 | 
15 |   def tic(self):
16 |     # using time.time instead of time.clock because time time.clock
17 |     # does not normalize for multithreading
18 |     self.start_time = time.time()
19 | 
20 |   def toc(self, average=True):
21 |     self.diff = time.time() - self.start_time
22 |     self.total_time += self.diff
23 |     self.calls += 1
24 |     self.average_time = self.total_time / self.calls
25 |     if average:
26 |       return self.average_time
27 |     else:
28 |       return self.diff
29 | 
30 |   def reset(self):
31 |     self.total_time = 0.
32 |     self.calls = 0
33 |     self.start_time = 0.
34 |     self.diff = 0.
35 |     self.average_time = 0.
36 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
 2 | 
 3 | Usage:
 4 | import this module before import any modules under lib/
 5 | e.g 
 6 |     import _init_paths
 7 |     from core.config import cfg
 8 | """ 
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
19 | 
20 | # Add lib to PYTHONPATH
21 | lib_path = osp.join(this_dir, 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/tools/download_imagenet_weights.py:
--------------------------------------------------------------------------------
 1 | """Script to downlaod ImageNet pretrained weights from Google Drive
 2 | 
 3 | Extra packages required to run the script:
 4 |     colorama, argparse_color_formatter
 5 | """
 6 | 
 7 | import argparse
 8 | import os
 9 | import requests
10 | from argparse_color_formatter import ColorHelpFormatter
11 | from colorama import init, Fore
12 | 
13 | import _init_paths  # pylint: disable=unused-import
14 | from core.config import cfg
15 | 
16 | 
17 | def parse_args():
18 |     """Parser command line argumnets"""
19 |     parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter)
20 |     parser.add_argument('--output_dir', help='Directory to save downloaded weight files',
21 |                         default=os.path.join(cfg.DATA_DIR, 'pretrained_model'))
22 |     parser.add_argument('-t', '--targets', nargs='+', metavar='file_name',
23 |                         help='Files to download. Allowed values are: ' +
24 |                         ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET,
25 |                                       list(PRETRAINED_WEIGHTS.keys()))),
26 |                         choices=list(PRETRAINED_WEIGHTS.keys()),
27 |                         default=list(PRETRAINED_WEIGHTS.keys()))
28 |     return parser.parse_args()
29 | 
30 | 
31 | # ---------------------------------------------------------------------------- #
32 | # Mapping from filename to google drive file_id
33 | # ---------------------------------------------------------------------------- #
34 | PRETRAINED_WEIGHTS = {
35 |     'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1',
36 |     'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l',
37 |     'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF',
38 |     'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k',
39 | }
40 | 
41 | 
42 | # ---------------------------------------------------------------------------- #
43 | # Helper fucntions for download file from google drive
44 | # ---------------------------------------------------------------------------- #
45 | 
46 | def download_file_from_google_drive(id, destination):
47 |     URL = "https://docs.google.com/uc?export=download"
48 | 
49 |     session = requests.Session()
50 | 
51 |     response = session.get(URL, params={'id': id}, stream=True)
52 |     token = get_confirm_token(response)
53 | 
54 |     if token:
55 |         params = {'id': id, 'confirm': token}
56 |         response = session.get(URL, params=params, stream=True)
57 | 
58 |     save_response_content(response, destination)
59 | 
60 | 
61 | def get_confirm_token(response):
62 |     for key, value in response.cookies.items():
63 |         if key.startswith('download_warning'):
64 |             return value
65 | 
66 |     return None
67 | 
68 | 
69 | def save_response_content(response, destination):
70 |     CHUNK_SIZE = 32768
71 | 
72 |     with open(destination, "wb") as f:
73 |         for chunk in response.iter_content(CHUNK_SIZE):
74 |             if chunk:  # filter out keep-alive new chunks
75 |                 f.write(chunk)
76 | 
77 | 
78 | def main():
79 |     init()  # colorama init. Only has effect on Windows
80 |     args = parse_args()
81 |     for filename in args.targets:
82 |         file_id = PRETRAINED_WEIGHTS[filename]
83 |         if not os.path.exists(args.output_dir):
84 |             os.makedirs(args.output_dir)
85 |         destination = os.path.join(args.output_dir, filename)
86 |         download_file_from_google_drive(file_id, destination)
87 |         print('Download {} to {}'.format(filename, destination))
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/tools/infer_simple.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import distutils.util
  7 | import os
  8 | import sys
  9 | import pprint
 10 | import subprocess
 11 | from collections import defaultdict
 12 | from six.moves import xrange
 13 | 
 14 | # Use a non-interactive backend
 15 | import matplotlib
 16 | matplotlib.use('Agg')
 17 | 
 18 | import numpy as np
 19 | import cv2
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.autograd import Variable
 24 | 
 25 | import _init_paths
 26 | import nn as mynn
 27 | from core.config import cfg, cfg_from_file, cfg_from_list, assert_and_infer_cfg
 28 | from core.test import im_detect_all
 29 | from modeling.model_builder import Generalized_RCNN
 30 | import datasets.dummy_datasets as datasets
 31 | import utils.misc as misc_utils
 32 | import utils.net as net_utils
 33 | import utils.vis as vis_utils
 34 | from utils.detectron_weight_helper import load_detectron_weight
 35 | from utils.timer import Timer
 36 | 
 37 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not
 38 | # thread safe and causes unwanted GPU memory allocations.
 39 | cv2.ocl.setUseOpenCL(False)
 40 | 
 41 | 
 42 | def parse_args():
 43 |     """Parse in command line arguments"""
 44 |     parser = argparse.ArgumentParser(description='Demonstrate mask-rcnn results')
 45 |     parser.add_argument(
 46 |         '--dataset', required=True,
 47 |         help='training dataset')
 48 | 
 49 |     parser.add_argument(
 50 |         '--cfg', dest='cfg_file', required=True,
 51 |         help='optional config file')
 52 |     parser.add_argument(
 53 |         '--set', dest='set_cfgs',
 54 |         help='set config keys, will overwrite config in the cfg_file',
 55 |         default=[], nargs='+')
 56 | 
 57 |     parser.add_argument(
 58 |         '--no_cuda', dest='cuda', help='whether use CUDA', action='store_false')
 59 | 
 60 |     parser.add_argument('--load_ckpt', help='path of checkpoint to load')
 61 |     parser.add_argument(
 62 |         '--load_detectron', help='path to the detectron weight pickle file')
 63 | 
 64 |     parser.add_argument(
 65 |         '--image_dir',
 66 |         help='directory to load images for demo')
 67 |     parser.add_argument(
 68 |         '--images', nargs='+',
 69 |         help='images to infer. Must not use with --image_dir')
 70 |     parser.add_argument(
 71 |         '--output_dir',
 72 |         help='directory to save demo results',
 73 |         default="infer_outputs")
 74 |     parser.add_argument(
 75 |         '--merge_pdfs', type=distutils.util.strtobool, default=True)
 76 | 
 77 |     args = parser.parse_args()
 78 | 
 79 |     return args
 80 | 
 81 | 
 82 | def main():
 83 |     """main function"""
 84 | 
 85 |     if not torch.cuda.is_available():
 86 |         sys.exit("Need a CUDA device to run the code.")
 87 | 
 88 |     args = parse_args()
 89 |     print('Called with args:')
 90 |     print(args)
 91 | 
 92 |     assert args.image_dir or args.images
 93 |     assert bool(args.image_dir) ^ bool(args.images)
 94 | 
 95 |     if args.dataset.startswith("coco"):
 96 |         dataset = datasets.get_coco_dataset()
 97 |         cfg.MODEL.NUM_CLASSES = len(dataset.classes)
 98 |     elif args.dataset.startswith("keypoints_coco"):
 99 |         dataset = datasets.get_coco_dataset()
100 |         cfg.MODEL.NUM_CLASSES = 2
101 |     else:
102 |         raise ValueError('Unexpected dataset name: {}'.format(args.dataset))
103 | 
104 |     print('load cfg from file: {}'.format(args.cfg_file))
105 |     cfg_from_file(args.cfg_file)
106 | 
107 |     if args.set_cfgs is not None:
108 |         cfg_from_list(args.set_cfgs)
109 | 
110 |     assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
111 |         'Exactly one of --load_ckpt and --load_detectron should be specified.'
112 |     cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False  # Don't need to load imagenet pretrained weights
113 |     assert_and_infer_cfg()
114 | 
115 |     maskRCNN = Generalized_RCNN()
116 | 
117 |     if args.cuda:
118 |         maskRCNN.cuda()
119 | 
120 |     if args.load_ckpt:
121 |         load_name = args.load_ckpt
122 |         print("loading checkpoint %s" % (load_name))
123 |         checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage)
124 |         net_utils.load_ckpt(maskRCNN, checkpoint['model'])
125 | 
126 |     if args.load_detectron:
127 |         print("loading detectron weights %s" % args.load_detectron)
128 |         load_detectron_weight(maskRCNN, args.load_detectron)
129 | 
130 |     maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'],
131 |                                  minibatch=True, device_ids=[0])  # only support single GPU
132 | 
133 |     maskRCNN.eval()
134 |     if args.image_dir:
135 |         imglist = misc_utils.get_imagelist_from_dir(args.image_dir)
136 |     else:
137 |         imglist = args.images
138 |     num_images = len(imglist)
139 |     if not os.path.exists(args.output_dir):
140 |         os.makedirs(args.output_dir)
141 | 
142 |     for i in xrange(num_images):
143 |         print('img', i)
144 |         im = cv2.imread(imglist[i])
145 |         assert im is not None
146 | 
147 |         timers = defaultdict(Timer)
148 | 
149 |         cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers)
150 | 
151 |         im_name, _ = os.path.splitext(os.path.basename(imglist[i]))
152 |         vis_utils.vis_one_image(
153 |             im[:, :, ::-1],  # BGR -> RGB for visualization
154 |             im_name,
155 |             args.output_dir,
156 |             cls_boxes,
157 |             cls_segms,
158 |             cls_keyps,
159 |             dataset=dataset,
160 |             box_alpha=0.3,
161 |             show_class=True,
162 |             thresh=0.7,
163 |             kp_thresh=2
164 |         )
165 | 
166 |     if args.merge_pdfs and num_images > 1:
167 |         merge_out_path = '{}/results.pdf'.format(args.output_dir)
168 |         if os.path.exists(merge_out_path):
169 |             os.remove(merge_out_path)
170 |         command = "pdfunite {}/*.pdf {}".format(args.output_dir,
171 |                                                 merge_out_path)
172 |         subprocess.call(command, shell=True)
173 | 
174 | 
175 | if __name__ == '__main__':
176 |     main()
177 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
  1 | """Perform inference on one or more datasets."""
  2 | 
  3 | import argparse
  4 | import cv2
  5 | import os
  6 | import pprint
  7 | import sys
  8 | import time
  9 | 
 10 | import torch
 11 | 
 12 | import _init_paths  # pylint: disable=unused-import
 13 | from core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg
 14 | from core.test_engine import run_inference
 15 | import utils.logging
 16 | 
 17 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not
 18 | # thread safe and causes unwanted GPU memory allocations.
 19 | cv2.ocl.setUseOpenCL(False)
 20 | 
 21 | 
 22 | def parse_args():
 23 |     """Parse in command line arguments"""
 24 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 25 |     parser.add_argument(
 26 |         '--dataset',
 27 |         help='training dataset')
 28 |     parser.add_argument(
 29 |         '--num_classes', dest='num_classes',
 30 |         help='Number of classes in your custom dataset',
 31 |         default=None, type=int)
 32 |     parser.add_argument(
 33 |         '--cfg', dest='cfg_file', required=True,
 34 |         help='optional config file')
 35 | 
 36 |     parser.add_argument(
 37 |         '--load_ckpt', help='path of checkpoint to load')
 38 |     parser.add_argument(
 39 |         '--load_detectron', help='path to the detectron weight pickle file')
 40 | 
 41 |     parser.add_argument(
 42 |         '--output_dir',
 43 |         help='output directory to save the testing results. If not provided, '
 44 |              'defaults to [args.load_ckpt|args.load_detectron]/../test.')
 45 | 
 46 |     parser.add_argument(
 47 |         '--set', dest='set_cfgs',
 48 |         help='set config keys, will overwrite config in the cfg_file.'
 49 |              ' See lib/core/config.py for all options',
 50 |         default=[], nargs='*')
 51 | 
 52 |     parser.add_argument(
 53 |         '--range',
 54 |         help='start (inclusive) and end (exclusive) indices',
 55 |         type=int, nargs=2)
 56 |     parser.add_argument(
 57 |         '--multi-gpu-testing', help='using multiple gpus for inference',
 58 |         action='store_true')
 59 |     parser.add_argument(
 60 |         '--vis', dest='vis', help='visualize detections', action='store_true')
 61 | 
 62 |     return parser.parse_args()
 63 | 
 64 | 
 65 | if __name__ == '__main__':
 66 | 
 67 |     if not torch.cuda.is_available():
 68 |         sys.exit("Need a CUDA device to run the code.")
 69 | 
 70 |     logger = utils.logging.setup_logging(__name__)
 71 |     args = parse_args()
 72 |     logger.info('Called with args:')
 73 |     logger.info(args)
 74 | 
 75 |     assert (torch.cuda.device_count() == 1) ^ bool(args.multi_gpu_testing)
 76 | 
 77 |     assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
 78 |         'Exactly one of --load_ckpt and --load_detectron should be specified.'
 79 |     if args.output_dir is None:
 80 |         ckpt_path = args.load_ckpt if args.load_ckpt else args.load_detectron
 81 |         args.output_dir = os.path.join(
 82 |             os.path.dirname(os.path.dirname(ckpt_path)), 'test')
 83 |         logger.info('Automatically set output directory to %s', args.output_dir)
 84 |     if not os.path.exists(args.output_dir):
 85 |         os.makedirs(args.output_dir)
 86 | 
 87 |     cfg.VIS = args.vis
 88 | 
 89 |     if args.dataset == "custom_dataset" and args.num_classes is None:
 90 |         raise ValueError("Need number of classes in your custom dataset to run!")
 91 | 
 92 |     if args.cfg_file is not None:
 93 |         merge_cfg_from_file(args.cfg_file)
 94 |     if args.set_cfgs is not None:
 95 |         merge_cfg_from_list(args.set_cfgs)
 96 | 
 97 |     if args.dataset == "coco2017":
 98 |         cfg.TEST.DATASETS = ('coco_2017_val',)
 99 |         cfg.MODEL.NUM_CLASSES = 81
100 |     elif args.dataset == "keypoints_coco2017":
101 |         cfg.TEST.DATASETS = ('keypoints_coco_2017_val',)
102 |         cfg.MODEL.NUM_CLASSES = 2
103 |     elif args.dataset == "voc2007":
104 |         cfg.TEST.DATASETS = ('voc_2007_test',)
105 |         cfg.MODEL.NUM_CLASSES = 21
106 |     elif args.dataset == "custom_dataset":
107 |         cfg.TEST.DATASETS = ('custom_data_test',)
108 |         cfg.MODEL.NUM_CLASSES = args.num_classes
109 |     else:  # For subprocess call
110 |         assert cfg.TEST.DATASETS, 'cfg.TEST.DATASETS shouldn\'t be empty'
111 |     assert_and_infer_cfg()
112 | 
113 |     logger.info('Testing with config:')
114 |     logger.info(pprint.pformat(cfg))
115 | 
116 |     # For test_engine.multi_gpu_test_net_on_dataset
117 |     args.test_net_file, _ = os.path.splitext(__file__)
118 |     # manually set args.cuda
119 |     args.cuda = True
120 | 
121 |     run_inference(
122 |         args,
123 |         ind_range=args.range,
124 |         multi_gpu_testing=args.multi_gpu_testing,
125 |         check_expected_results=True)
126 | 


--------------------------------------------------------------------------------