├── .github └── issue_template.md ├── .gitignore ├── .pylintrc ├── .travis.yml ├── .vscode └── settings.json ├── BENCHMARK.md ├── LICENSE ├── README.md ├── configs ├── baselines │ ├── e2e_faster_rcnn_R-101-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-101-FPN_2x.yaml │ ├── e2e_faster_rcnn_R-50-C4_1x.yaml │ ├── e2e_faster_rcnn_R-50-C4_2x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ ├── e2e_mask_rcnn_R-101-C4_2x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_2x.yaml │ ├── e2e_mask_rcnn_R-50-C4_1x.yaml │ ├── e2e_mask_rcnn_R-50-C4_2x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml │ └── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml ├── getting_started │ ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml │ └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml └── gn_baselines │ ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml │ ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml │ ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml ├── demo ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k.jpg ├── convert_pdf2img.sh ├── e2e_mask_rcnn_R-50-C4 │ └── train_from_scratch_epoch1_bs4 │ │ ├── img1.jpg │ │ ├── img2.jpg │ │ ├── img3.jpg │ │ └── img4.jpg ├── img1_keypoints-detectron-R50-FPN.jpg ├── img1_keypoints-pydetectron-R50-FPN.jpg ├── img2_keypoints-detectron-R50-FPN.jpg ├── img2_keypoints-pydetectron-R50-FPN.jpg ├── loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg ├── loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg ├── loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg ├── loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg ├── sample_images │ ├── img1.jpg │ ├── img2.jpg │ ├── img3.jpg │ └── img4.jpg └── sample_images_keypoints │ ├── img1_keypoints.jpg │ └── img2_keypoints.jpg ├── lib ├── core │ ├── __init__.py │ ├── config.py │ ├── test.py │ └── test_engine.py ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── cityscapes │ │ ├── __init__.py │ │ ├── coco_to_cityscapes_id.py │ │ └── tools │ │ │ ├── convert_cityscapes_to_coco.py │ │ │ └── convert_coco_model_to_cityscapes.py │ ├── cityscapes_json_dataset_evaluator.py │ ├── dataset_catalog.py │ ├── dummy_datasets.py │ ├── json_dataset.py │ ├── json_dataset_evaluator.py │ ├── roidb.py │ ├── task_evaluation.py │ ├── voc_dataset_evaluator.py │ └── voc_eval.py ├── make.sh ├── modeling │ ├── FPN.py │ ├── ResNet.py │ ├── __init__.py │ ├── collect_and_distribute_fpn_rpn_proposals.py │ ├── fast_rcnn_heads.py │ ├── generate_anchors.py │ ├── generate_proposal_labels.py │ ├── generate_proposals.py │ ├── keypoint_rcnn_heads.py │ ├── mask_rcnn_heads.py │ ├── model_builder.py │ └── rpn_heads.py ├── nn │ ├── __init__.py │ ├── functional.py │ ├── init.py │ ├── modules │ │ ├── __init__.py │ │ ├── affine.py │ │ ├── normalization.py │ │ └── upsample.py │ └── parallel │ │ ├── __init__.py │ │ ├── _functions.py │ │ ├── data_parallel.py │ │ ├── parallel_apply.py │ │ ├── replicate.py │ │ └── scatter_gather.py ├── roi_data │ ├── __init__.py │ ├── data_utils.py │ ├── fast_rcnn.py │ ├── keypoint_rcnn.py │ ├── loader.py │ ├── mask_rcnn.py │ ├── minibatch.py │ └── rpn.py ├── setup.py └── utils │ ├── __init__.py │ ├── blob.py │ ├── boxes.py │ ├── collections.py │ ├── colormap.py │ ├── cython_bbox.c │ ├── cython_bbox.pyx │ ├── cython_nms.c │ ├── cython_nms.pyx │ ├── detectron_weight_helper.py │ ├── env.py │ ├── fpn.py │ ├── image.py │ ├── io.py │ ├── keypoints.py │ ├── logging.py │ ├── misc.py │ ├── net.py │ ├── net_utils.py │ ├── resnet_weights_helper.py │ ├── segms.py │ ├── subprocess.py │ ├── timer.py │ ├── training_stats.py │ └── vis.py └── tools ├── _init_paths.py ├── download_imagenet_weights.py ├── infer_simple.py ├── pascal_voc_xml2coco_json_converter.py ├── test_net.py ├── train_net.py └── train_net_step.py /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING 2 | 1. **Read the README.md thoroughly ! README.md is not a decoration.** 3 | 2. Please search existing *open and closed* issues in case your issue has already been reported 4 | 3. Please try to debug the issue in case you can solve it on your own before posting 5 | 6 | ## After following steps above and agreeing to provide the detailed information requested below, you may continue with posting your issue 7 | (**Delete this line and the text above it.**) 8 | 9 | ### Expected results 10 | 11 | What did you expect to see? 12 | 13 | ### Actual results 14 | 15 | What did you observe instead? 16 | 17 | ### Detailed steps to reproduce 18 | 19 | E.g.: 20 | 21 | ``` 22 | The command that you ran 23 | ``` 24 | 25 | ### System information 26 | 27 | * Operating system: ? 28 | * CUDA version: ? 29 | * cuDNN version: ? 30 | * GPU models (for all devices if they are not all the same): ? 31 | * python version: ? 32 | * pytorch version: ? 33 | * Anything else that seems relevant: ? 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | *.pyc 3 | *~ 4 | 5 | *.o 6 | *.so 7 | 8 | .ipynb_checkpoints 9 | notebooks/*.pkl 10 | 11 | /Outputs 12 | lib/build 13 | lib/detectron_pytorch.egg-info 14 | 15 | # ------------------------------ 16 | 17 | .vscode/* 18 | !.vscode/settings.json 19 | !.vscode/tasks.json 20 | !.vscode/launch.json 21 | !.vscode/extensions.json 22 | 23 | # General 24 | .DS_Store 25 | .AppleDouble 26 | .LSOverride 27 | 28 | # Icon must end with two \r 29 | Icon 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | .com.apple.timemachine.donotpresent 42 | 43 | # Directories potentially created on remote AFP share 44 | .AppleDB 45 | .AppleDesktop 46 | Network Trash Folder 47 | Temporary Items 48 | .apdisk 49 | 50 | *~ 51 | 52 | # temporary files which can be created if a process still has a handle open of a deleted file 53 | .fuse_hidden* 54 | 55 | # KDE directory preferences 56 | .directory 57 | 58 | # Linux trash folder which might appear on any partition or disk 59 | .Trash-* 60 | 61 | # .nfs files are created when an open file is removed but is still being accessed 62 | .nfs* 63 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | extension-pkg-whitelist=numpy,torch,cv2 3 | init-hook="sys.path.insert(0, './tools'); import _init_paths" 4 | 5 | [MESSAGES CONTROL] 6 | disable=wrong-import-position 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | language: python 3 | cache: pip 4 | python: 5 | - 3.6 6 | #- nightly 7 | #- pypy3 8 | matrix: 9 | allow_failures: 10 | - python: nightly 11 | - python: pypy3 12 | install: 13 | #- pip install -r requirements.txt 14 | - pip install flake8 # pytest # add another testing frameworks later 15 | before_script: 16 | # stop the build if there are Python syntax errors or undefined names 17 | - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics 18 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 19 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 20 | script: 21 | - true # pytest --capture=sys # add other tests here 22 | notifications: 23 | on_success: change 24 | on_failure: change # `always` will be the setting once code changes slow down 25 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": true, 3 | "python.linting.flake8Enabled": false, 4 | "python.autoComplete.extraPaths": ["${workspaceRoot}/lib"], 5 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Roy Tseng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | 36 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | TRAIN: 22 | SCALES: (800,) 23 | MAX_SIZE: 1333 24 | IMS_PER_BATCH: 1 25 | BATCH_SIZE_PER_IM: 512 26 | TEST: 27 | SCALE: 800 28 | MAX_SIZE: 1333 29 | NMS: 0.5 30 | RPN_PRE_NMS_TOP_N: 6000 31 | RPN_POST_NMS_TOP_N: 1000 32 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 360000 15 | STEPS: [0, 240000, 320000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | TRAIN: 22 | SCALES: (800,) 23 | MAX_SIZE: 1333 24 | IMS_PER_BATCH: 1 25 | BATCH_SIZE_PER_IM: 512 26 | TEST: 27 | SCALE: 800 28 | MAX_SIZE: 1333 29 | NMS: 0.5 30 | RPN_PRE_NMS_TOP_N: 6000 31 | RPN_POST_NMS_TOP_N: 1000 32 | 33 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet101_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | RESNETS: 8 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 9 | NUM_GPUS: 8 10 | SOLVER: 11 | WEIGHT_DECAY: 0.0001 12 | LR_POLICY: steps_with_decay 13 | BASE_LR: 0.02 14 | GAMMA: 0.1 15 | MAX_ITER: 90000 16 | STEPS: [0, 60000, 80000] 17 | FPN: 18 | FPN_ON: True 19 | MULTILEVEL_ROIS: True 20 | MULTILEVEL_RPN: True 21 | FAST_RCNN: 22 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 23 | ROI_XFORM_METHOD: RoIAlign 24 | ROI_XFORM_RESOLUTION: 7 25 | ROI_XFORM_SAMPLING_RATIO: 2 26 | MRCNN: 27 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 28 | RESOLUTION: 28 # (output mask resolution) default 14 29 | ROI_XFORM_METHOD: RoIAlign 30 | ROI_XFORM_RESOLUTION: 14 # default 7 31 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 32 | DILATION: 1 # default 2 33 | CONV_INIT: MSRAFill # default GaussianFill 34 | TRAIN: 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 44 | RPN_POST_NMS_TOP_N: 1000 45 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 180000 16 | STEPS: [0, 120000, 160000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet152_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 260000 15 | STEPS: [0, 200000, 240000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-152-32x8d-IN5k.pkl' 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (640, 672, 704, 736, 768, 800) # Scale jitter 41 | MAX_SIZE: 1333 42 | IMS_PER_BATCH: 1 43 | BATCH_SIZE_PER_IM: 512 44 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 45 | TEST: 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | BBOX_VOTE: 50 | ENABLED: True 51 | VOTE_TH: 0.9 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | BBOX_AUG: 55 | ENABLED: True 56 | SCORE_HEUR: UNION 57 | COORD_HEUR: UNION 58 | H_FLIP: True 59 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 60 | MAX_SIZE: 2000 61 | SCALE_H_FLIP: True 62 | SCALE_SIZE_DEP: False 63 | ASPECT_RATIOS: () 64 | ASPECT_RATIO_H_FLIP: False 65 | MASK_AUG: 66 | ENABLED: True 67 | HEUR: SOFT_AVG 68 | H_FLIP: True 69 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 70 | MAX_SIZE: 2000 71 | SCALE_H_FLIP: True 72 | SCALE_SIZE_DEP: False 73 | ASPECT_RATIOS: () 74 | ASPECT_RATIO_H_FLIP: False 75 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 1 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.0025 12 | GAMMA: 0.1 13 | MAX_ITER: 60000 14 | STEPS: [0, 30000, 40000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 2 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.005 12 | GAMMA: 0.1 13 | MAX_ITER: 30000 14 | STEPS: [0, 15000, 20000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | 53 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 4 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 15000 14 | STEPS: [0, 7500, 10000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 7500 14 | STEPS: [0, 3750, 5000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 270000 13 | STEPS: [0, 210000, 250000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 270000 13 | STEPS: [0, 210000, 250000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | LOAD_IMAGENET_PRETRAINED_WEIGHTS: False 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 270000 14 | STEPS: [0, 210000, 250000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | USE_GN: True # Note: use GN on the FPN-specific layers 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | LOAD_IMAGENET_PRETRAINED_WEIGHTS: False 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 270000 14 | STEPS: [0, 210000, 250000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | USE_GN: True # Note: use GN on the FPN-specific layers 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/33823288584_1d21cf0a26_k.jpg -------------------------------------------------------------------------------- /demo/convert_pdf2img.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pdfdir='' 4 | 5 | while getopts 'd:' flag; do 6 | case "$flag" in 7 | d) pdfdir=$OPTARG ;; 8 | esac 9 | done 10 | 11 | for pdf in $(ls ${pdfdir}/img*.pdf); do 12 | fname="${pdf%.*}" 13 | convert -density 300x300 -quality 95 $pdf ${fname}.jpg 14 | done 15 | -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img1_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img1_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img2_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/img2_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg -------------------------------------------------------------------------------- /demo/sample_images/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img1.jpg -------------------------------------------------------------------------------- /demo/sample_images/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img2.jpg -------------------------------------------------------------------------------- /demo/sample_images/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img3.jpg -------------------------------------------------------------------------------- /demo/sample_images/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images/img4.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img1_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images_keypoints/img1_keypoints.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img2_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/demo/sample_images_keypoints/img2_keypoints.jpg -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/core/__init__.py -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/datasets/__init__.py -------------------------------------------------------------------------------- /lib/datasets/cityscapes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/datasets/cityscapes/__init__.py -------------------------------------------------------------------------------- /lib/datasets/cityscapes/coco_to_cityscapes_id.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | # mapping coco categories to cityscapes (our converted json) id 17 | # cityscapes 18 | # INFO roidb.py: 220: 1 bicycle: 7286 19 | # INFO roidb.py: 220: 2 car: 53684 20 | # INFO roidb.py: 220: 3 person: 35704 21 | # INFO roidb.py: 220: 4 train: 336 22 | # INFO roidb.py: 220: 5 truck: 964 23 | # INFO roidb.py: 220: 6 motorcycle: 1468 24 | # INFO roidb.py: 220: 7 bus: 758 25 | # INFO roidb.py: 220: 8 rider: 3504 26 | 27 | # coco (val5k) 28 | # INFO roidb.py: 220: 1 person: 21296 29 | # INFO roidb.py: 220: 2 bicycle: 628 30 | # INFO roidb.py: 220: 3 car: 3818 31 | # INFO roidb.py: 220: 4 motorcycle: 732 32 | # INFO roidb.py: 220: 5 airplane: 286 <------ irrelevant 33 | # INFO roidb.py: 220: 6 bus: 564 34 | # INFO roidb.py: 220: 7 train: 380 35 | # INFO roidb.py: 220: 8 truck: 828 36 | 37 | 38 | def cityscapes_to_coco(cityscapes_id): 39 | lookup = { 40 | 0: 0, # ... background 41 | 1: 2, # bicycle 42 | 2: 3, # car 43 | 3: 1, # person 44 | 4: 7, # train 45 | 5: 8, # truck 46 | 6: 4, # motorcycle 47 | 7: 6, # bus 48 | 8: -1, # rider (-1 means rand init) 49 | } 50 | return lookup[cityscapes_id] 51 | 52 | 53 | def cityscapes_to_coco_with_rider(cityscapes_id): 54 | lookup = { 55 | 0: 0, # ... background 56 | 1: 2, # bicycle 57 | 2: 3, # car 58 | 3: 1, # person 59 | 4: 7, # train 60 | 5: 8, # truck 61 | 6: 4, # motorcycle 62 | 7: 6, # bus 63 | 8: 1, # rider ("person", *rider has human right!*) 64 | } 65 | return lookup[cityscapes_id] 66 | 67 | 68 | def cityscapes_to_coco_without_person_rider(cityscapes_id): 69 | lookup = { 70 | 0: 0, # ... background 71 | 1: 2, # bicycle 72 | 2: 3, # car 73 | 3: -1, # person (ignore) 74 | 4: 7, # train 75 | 5: 8, # truck 76 | 6: 4, # motorcycle 77 | 7: 6, # bus 78 | 8: -1, # rider (ignore) 79 | } 80 | return lookup[cityscapes_id] 81 | 82 | 83 | def cityscapes_to_coco_all_random(cityscapes_id): 84 | lookup = { 85 | 0: -1, # ... background 86 | 1: -1, # bicycle 87 | 2: -1, # car 88 | 3: -1, # person (ignore) 89 | 4: -1, # train 90 | 5: -1, # truck 91 | 6: -1, # motorcycle 92 | 7: -1, # bus 93 | 8: -1, # rider (ignore) 94 | } 95 | return lookup[cityscapes_id] 96 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes/tools/convert_coco_model_to_cityscapes.py: -------------------------------------------------------------------------------- 1 | # Convert a detection model trained for COCO into a model that can be fine-tuned 2 | # on cityscapes 3 | # 4 | # cityscapes_to_coco 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | from __future__ import unicode_literals 10 | 11 | from six.moves import cPickle as pickle 12 | import argparse 13 | import os 14 | import sys 15 | import numpy as np 16 | 17 | import datasets.cityscapes.coco_to_cityscapes_id as cs 18 | 19 | NUM_CS_CLS = 9 20 | NUM_COCO_CLS = 81 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser( 25 | description='Convert a COCO pre-trained model for use with Cityscapes') 26 | parser.add_argument( 27 | '--coco_model', dest='coco_model_file_name', 28 | help='Pretrained network weights file path', 29 | default=None, type=str) 30 | parser.add_argument( 31 | '--convert_func', dest='convert_func', 32 | help='Blob conversion function', 33 | default='cityscapes_to_coco', type=str) 34 | parser.add_argument( 35 | '--output', dest='out_file_name', 36 | help='Output file path', 37 | default=None, type=str) 38 | 39 | if len(sys.argv) == 1: 40 | parser.print_help() 41 | sys.exit(1) 42 | 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def convert_coco_blobs_to_cityscape_blobs(model_dict): 48 | for k, v in model_dict['blobs'].items(): 49 | if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS: 50 | coco_blob = model_dict['blobs'][k] 51 | print( 52 | 'Converting COCO blob {} with shape {}'. 53 | format(k, coco_blob.shape) 54 | ) 55 | cs_blob = convert_coco_blob_to_cityscapes_blob( 56 | coco_blob, args.convert_func 57 | ) 58 | print(' -> converted shape {}'.format(cs_blob.shape)) 59 | model_dict['blobs'][k] = cs_blob 60 | 61 | 62 | def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func): 63 | # coco blob (81, ...) or (81*4, ...) 64 | coco_shape = coco_blob.shape 65 | leading_factor = int(coco_shape[0] / NUM_COCO_CLS) 66 | tail_shape = list(coco_shape[1:]) 67 | assert leading_factor == 1 or leading_factor == 4 68 | 69 | # Reshape in [num_classes, ...] form for easier manipulations 70 | coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape) 71 | # Default initialization uses Gaussian with mean and std to match the 72 | # existing parameters 73 | std = coco_blob.std() 74 | mean = coco_blob.mean() 75 | cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:]) 76 | cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32) 77 | 78 | # Replace random parameters with COCO parameters if class mapping exists 79 | for i in range(NUM_CS_CLS): 80 | coco_cls_id = getattr(cs, convert_func)(i) 81 | if coco_cls_id >= 0: # otherwise ignore (rand init) 82 | cs_blob[i] = coco_blob[coco_cls_id] 83 | 84 | cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape 85 | return cs_blob.reshape(cs_shape) 86 | 87 | 88 | def remove_momentum(model_dict): 89 | for k in model_dict['blobs'].keys(): 90 | if k.endswith('_momentum'): 91 | del model_dict['blobs'][k] 92 | 93 | 94 | def load_and_convert_coco_model(args): 95 | with open(args.coco_model_file_name, 'r') as f: 96 | model_dict = pickle.load(f) 97 | remove_momentum(model_dict) 98 | convert_coco_blobs_to_cityscape_blobs(model_dict) 99 | return model_dict 100 | 101 | 102 | if __name__ == '__main__': 103 | args = parse_args() 104 | print(args) 105 | assert os.path.exists(args.coco_model_file_name), \ 106 | 'Weights file does not exist' 107 | weights = load_and_convert_coco_model(args) 108 | 109 | with open(args.out_file_name, 'w') as f: 110 | pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL) 111 | print('Wrote blobs to {}:'.format(args.out_file_name)) 112 | print(sorted(weights['blobs'].keys())) 113 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes_json_dataset_evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Functions for evaluating results on Cityscapes.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import logging 25 | import os 26 | import uuid 27 | 28 | import pycocotools.mask as mask_util 29 | 30 | from core.config import cfg 31 | from datasets.dataset_catalog import DATASETS 32 | from datasets.dataset_catalog import RAW_DIR 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def evaluate_masks( 38 | json_dataset, 39 | all_boxes, 40 | all_segms, 41 | output_dir, 42 | use_salt=True, 43 | cleanup=False 44 | ): 45 | if cfg.CLUSTER.ON_CLUSTER: 46 | # On the cluster avoid saving these files in the job directory 47 | output_dir = '/tmp' 48 | res_file = os.path.join( 49 | output_dir, 'segmentations_' + json_dataset.name + '_results') 50 | if use_salt: 51 | res_file += '_{}'.format(str(uuid.uuid4())) 52 | res_file += '.json' 53 | 54 | results_dir = os.path.join(output_dir, 'results') 55 | if not os.path.exists(results_dir): 56 | os.mkdir(results_dir) 57 | 58 | os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR] 59 | os.environ['CITYSCAPES_RESULTS'] = output_dir 60 | 61 | # Load the Cityscapes eval script *after* setting the required env vars, 62 | # since the script reads their values into global variables (at load time). 63 | import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ 64 | as cityscapes_eval 65 | 66 | roidb = json_dataset.get_roidb() 67 | for i, entry in enumerate(roidb): 68 | im_name = entry['image'] 69 | 70 | basename = os.path.splitext(os.path.basename(im_name))[0] 71 | txtname = os.path.join(output_dir, basename + 'pred.txt') 72 | with open(txtname, 'w') as fid_txt: 73 | if i % 10 == 0: 74 | logger.info('i: {}: {}'.format(i, basename)) 75 | for j in range(1, len(all_segms)): 76 | clss = json_dataset.classes[j] 77 | clss_id = cityscapes_eval.name2label[clss].id 78 | segms = all_segms[j][i] 79 | boxes = all_boxes[j][i] 80 | if segms == []: 81 | continue 82 | masks = mask_util.decode(segms) 83 | 84 | for k in range(boxes.shape[0]): 85 | score = boxes[k, -1] 86 | mask = masks[:, :, k] 87 | pngname = os.path.join( 88 | 'results', 89 | basename + '_' + clss + '_{}.png'.format(k)) 90 | # write txt 91 | fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score)) 92 | # save mask 93 | cv2.imwrite(os.path.join(output_dir, pngname), mask * 255) 94 | logger.info('Evaluating...') 95 | cityscapes_eval.main([]) 96 | return None 97 | -------------------------------------------------------------------------------- /lib/datasets/dummy_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use 16 | cases, like getting all classes in a dataset. This exists so that demos can be 17 | run without requiring users to download/install datasets first. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | from utils.collections import AttrDict 26 | 27 | 28 | def get_coco_dataset(): 29 | """A dummy COCO dataset that includes only the 'classes' field.""" 30 | ds = AttrDict() 31 | classes = [ 32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 45 | ] 46 | ds.classes = {i: name for i, name in enumerate(classes)} 47 | return ds 48 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | python setup.py build_ext --inplace 5 | rm -rf build 6 | 7 | -------------------------------------------------------------------------------- /lib/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/modeling/__init__.py -------------------------------------------------------------------------------- /lib/modeling/collect_and_distribute_fpn_rpn_proposals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | 4 | from core.config import cfg 5 | from datasets import json_dataset 6 | import roi_data.fast_rcnn 7 | import utils.blob as blob_utils 8 | import utils.fpn as fpn_utils 9 | 10 | 11 | class CollectAndDistributeFpnRpnProposalsOp(nn.Module): 12 | """Merge RPN proposals generated at multiple FPN levels and then 13 | distribute those proposals to their appropriate FPN levels. An anchor 14 | at one FPN level may predict an RoI that will map to another level, 15 | hence the need to redistribute the proposals. 16 | 17 | This function assumes standard blob names for input and output blobs. 18 | 19 | Input blobs: [rpn_rois_fpn, ..., rpn_rois_fpn, 20 | rpn_roi_probs_fpn, ..., rpn_roi_probs_fpn] 21 | - rpn_rois_fpn are the RPN proposals for FPN level i; see rpn_rois 22 | documentation from GenerateProposals. 23 | - rpn_roi_probs_fpn are the RPN objectness probabilities for FPN 24 | level i; see rpn_roi_probs documentation from GenerateProposals. 25 | 26 | If used during training, then the input blobs will also include: 27 | [roidb, im_info] (see GenerateProposalLabels). 28 | 29 | Output blobs: [rois_fpn, ..., rois_rpn, rois, 30 | rois_idx_restore] 31 | - rois_fpn are the RPN proposals for FPN level i 32 | - rois_idx_restore is a permutation on the concatenation of all 33 | rois_fpn, i=min...max, such that when applied the RPN RoIs are 34 | restored to their original order in the input blobs. 35 | 36 | If used during training, then the output blobs will also include: 37 | [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights]. 38 | """ 39 | def __init__(self): 40 | super().__init__() 41 | 42 | def forward(self, inputs, roidb, im_info): 43 | """ 44 | Args: 45 | inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6, 46 | rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6] 47 | im_info: [[im_height, im_width, im_scale], ...] 48 | """ 49 | rois = collect(inputs, self.training) 50 | if self.training: 51 | # During training we reuse the data loader code. We populate roidb 52 | # entries on the fly using the rois generated by RPN. 53 | im_scales = im_info.data.numpy()[:, 2] 54 | # For historical consistency with the original Faster R-CNN 55 | # implementation we are *not* filtering crowd proposals. 56 | # This choice should be investigated in the future (it likely does 57 | # not matter). 58 | json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) 59 | # Compute training labels for the RPN proposals; also handles 60 | # distributing the proposals over FPN levels 61 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 62 | blobs = {k: [] for k in output_blob_names} 63 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 64 | else: 65 | # For inference we have a special code path that avoids some data 66 | # loader overhead 67 | blobs = distribute(rois, None) 68 | 69 | return blobs 70 | 71 | 72 | def collect(inputs, is_training): 73 | cfg_key = 'TRAIN' if is_training else 'TEST' 74 | post_nms_topN = int(cfg[cfg_key].RPN_POST_NMS_TOP_N * cfg.FPN.RPN_COLLECT_SCALE + 0.5) 75 | k_max = cfg.FPN.RPN_MAX_LEVEL 76 | k_min = cfg.FPN.RPN_MIN_LEVEL 77 | num_lvls = k_max - k_min + 1 78 | roi_inputs = inputs[:num_lvls] 79 | score_inputs = inputs[num_lvls:] 80 | 81 | # rois are in [[batch_idx, x0, y0, x1, y2], ...] format 82 | # Combine predictions across all levels and retain the top scoring 83 | rois = np.concatenate(roi_inputs) 84 | scores = np.concatenate(score_inputs).squeeze() 85 | inds = np.argsort(-scores)[:post_nms_topN] 86 | rois = rois[inds, :] 87 | return rois 88 | 89 | 90 | def distribute(rois, label_blobs): 91 | """To understand the output blob order see return value of 92 | roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False) 93 | """ 94 | lvl_min = cfg.FPN.ROI_MIN_LEVEL 95 | lvl_max = cfg.FPN.ROI_MAX_LEVEL 96 | lvls = fpn_utils.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max) 97 | 98 | # Delete roi entries that have negative area 99 | # idx_neg = np.where(lvls == -1)[0] 100 | # rois = np.delete(rois, idx_neg, axis=0) 101 | # lvls = np.delete(lvls, idx_neg, axis=0) 102 | 103 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False) 104 | outputs = [None] * len(output_blob_names) 105 | outputs[0] = rois 106 | 107 | # Create new roi blobs for each FPN level 108 | # (See: utils.fpn.add_multilevel_roi_blobs which is similar but annoying 109 | # to generalize to support this particular case.) 110 | rois_idx_order = np.empty((0, )) 111 | for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)): 112 | idx_lvl = np.where(lvls == lvl)[0] 113 | blob_roi_level = rois[idx_lvl, :] 114 | outputs[output_idx + 1] = blob_roi_level 115 | rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) 116 | rois_idx_restore = np.argsort(rois_idx_order) 117 | outputs[-1] = rois_idx_restore.astype(np.int32) 118 | 119 | return dict(zip(output_blob_names, outputs)) 120 | -------------------------------------------------------------------------------- /lib/modeling/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Faster R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick and Sean Bell 22 | # -------------------------------------------------------- 23 | 24 | import numpy as np 25 | 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 27 | # 28 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 29 | # >> anchors 30 | # 31 | # anchors = 32 | # 33 | # -83 -39 100 56 34 | # -175 -87 192 104 35 | # -359 -183 376 200 36 | # -55 -55 72 72 37 | # -119 -119 136 136 38 | # -247 -247 264 264 39 | # -35 -79 52 96 40 | # -79 -167 96 184 41 | # -167 -343 184 360 42 | 43 | # array([[ -83., -39., 100., 56.], 44 | # [-175., -87., 192., 104.], 45 | # [-359., -183., 376., 200.], 46 | # [ -55., -55., 72., 72.], 47 | # [-119., -119., 136., 136.], 48 | # [-247., -247., 264., 264.], 49 | # [ -35., -79., 52., 96.], 50 | # [ -79., -167., 96., 184.], 51 | # [-167., -343., 184., 360.]]) 52 | 53 | 54 | def generate_anchors( 55 | stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2) 56 | ): 57 | """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 58 | are centered on stride / 2, have (approximate) sqrt areas of the specified 59 | sizes, and aspect ratios as given. 60 | """ 61 | return _generate_anchors( 62 | stride, 63 | np.array(sizes, dtype=np.float) / stride, 64 | np.array(aspect_ratios, dtype=np.float) 65 | ) 66 | 67 | 68 | def _generate_anchors(base_size, scales, aspect_ratios): 69 | """Generate anchor (reference) windows by enumerating aspect ratios X 70 | scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. 71 | """ 72 | anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1 73 | anchors = _ratio_enum(anchor, aspect_ratios) 74 | anchors = np.vstack( 75 | [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] 76 | ) 77 | return anchors 78 | 79 | 80 | def _whctrs(anchor): 81 | """Return width, height, x center, and y center for an anchor (window).""" 82 | w = anchor[2] - anchor[0] + 1 83 | h = anchor[3] - anchor[1] + 1 84 | x_ctr = anchor[0] + 0.5 * (w - 1) 85 | y_ctr = anchor[1] + 0.5 * (h - 1) 86 | return w, h, x_ctr, y_ctr 87 | 88 | 89 | def _mkanchors(ws, hs, x_ctr, y_ctr): 90 | """Given a vector of widths (ws) and heights (hs) around a center 91 | (x_ctr, y_ctr), output a set of anchors (windows). 92 | """ 93 | ws = ws[:, np.newaxis] 94 | hs = hs[:, np.newaxis] 95 | anchors = np.hstack( 96 | ( 97 | x_ctr - 0.5 * (ws - 1), 98 | y_ctr - 0.5 * (hs - 1), 99 | x_ctr + 0.5 * (ws - 1), 100 | y_ctr + 0.5 * (hs - 1) 101 | ) 102 | ) 103 | return anchors 104 | 105 | 106 | def _ratio_enum(anchor, ratios): 107 | """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" 108 | w, h, x_ctr, y_ctr = _whctrs(anchor) 109 | size = w * h 110 | size_ratios = size / ratios 111 | ws = np.round(np.sqrt(size_ratios)) 112 | hs = np.round(ws * ratios) 113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 114 | return anchors 115 | 116 | 117 | def _scale_enum(anchor, scales): 118 | """Enumerate a set of anchors for each scale wrt an anchor.""" 119 | w, h, x_ctr, y_ctr = _whctrs(anchor) 120 | ws = w * scales 121 | hs = h * scales 122 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 123 | return anchors 124 | -------------------------------------------------------------------------------- /lib/modeling/generate_proposal_labels.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from core.config import cfg 4 | from datasets import json_dataset 5 | import roi_data.fast_rcnn 6 | 7 | 8 | class GenerateProposalLabelsOp(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | 12 | def forward(self, rpn_rois, roidb, im_info): 13 | """Op for generating training labels for RPN proposals. This is used 14 | when training RPN jointly with Fast/Mask R-CNN (as in end-to-end 15 | Faster R-CNN training). 16 | 17 | blobs_in: 18 | - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals 19 | - 'roidb': roidb entries that will be labeled 20 | - 'im_info': See GenerateProposals doc. 21 | 22 | blobs_out: 23 | - (variable set of blobs): returns whatever blobs are required for 24 | training the model. It does this by querying the data loader for 25 | the list of blobs that are needed. 26 | """ 27 | im_scales = im_info.data.numpy()[:, 2] 28 | 29 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 30 | # For historical consistency with the original Faster R-CNN 31 | # implementation we are *not* filtering crowd proposals. 32 | # This choice should be investigated in the future (it likely does 33 | # not matter). 34 | # Note: crowd_thresh=0 will ignore _filter_crowd_proposals 35 | json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0) 36 | blobs = {k: [] for k in output_blob_names} 37 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 38 | 39 | return blobs 40 | -------------------------------------------------------------------------------- /lib/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | from .parallel import DataParallel 3 | from . import init -------------------------------------------------------------------------------- /lib/nn/functional.py: -------------------------------------------------------------------------------- 1 | """Functional interface""" 2 | 3 | 4 | def group_norm(x, num_groups, weight=None, bias=None, eps=1e-5): 5 | input_shape = x.shape 6 | ndim = len(input_shape) 7 | N, C = input_shape[:2] 8 | G = num_groups 9 | assert C % G == 0, "input channel dimension must divisible by number of groups" 10 | x = x.view(N, G, -1) 11 | mean = x.mean(-1, keepdim=True) 12 | var = x.var(-1, keepdim=True) 13 | x = (x - mean) / (var + eps).sqrt() 14 | x = x.view(input_shape) 15 | view_shape = (1, -1) + (1,) * (ndim - 2) 16 | if weight is not None: 17 | return x * weight.view(view_shape) + bias.view(view_shape) 18 | return x 19 | -------------------------------------------------------------------------------- /lib/nn/init.py: -------------------------------------------------------------------------------- 1 | """Parameter initialization functions 2 | """ 3 | 4 | import math 5 | import operator 6 | from functools import reduce 7 | 8 | import torch.nn.init as init 9 | 10 | 11 | def XavierFill(tensor): 12 | """Caffe2 XavierFill Implementation""" 13 | size = reduce(operator.mul, tensor.shape, 1) 14 | fan_in = size / tensor.shape[0] 15 | scale = math.sqrt(3 / fan_in) 16 | return init.uniform_(tensor, -scale, scale) 17 | 18 | 19 | def MSRAFill(tensor): 20 | """Caffe2 MSRAFill Implementation""" 21 | size = reduce(operator.mul, tensor.shape, 1) 22 | fan_out = size / tensor.shape[1] 23 | scale = math.sqrt(2 / fan_out) 24 | return init.normal_(tensor, 0, scale) 25 | -------------------------------------------------------------------------------- /lib/nn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine import AffineChannel2d 2 | from .normalization import GroupNorm 3 | from .upsample import BilinearInterpolation2d 4 | -------------------------------------------------------------------------------- /lib/nn/modules/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AffineChannel2d(nn.Module): 6 | """ A simple channel-wise affine transformation operation """ 7 | def __init__(self, num_features): 8 | super().__init__() 9 | self.num_features = num_features 10 | self.weight = nn.Parameter(torch.Tensor(num_features)) 11 | self.bias = nn.Parameter(torch.Tensor(num_features)) 12 | self.weight.data.uniform_() 13 | self.bias.data.zero_() 14 | 15 | def forward(self, x): 16 | return x * self.weight.view(1, self.num_features, 1, 1) + \ 17 | self.bias.view(1, self.num_features, 1, 1) 18 | -------------------------------------------------------------------------------- /lib/nn/modules/normalization.py: -------------------------------------------------------------------------------- 1 | """Normalization Layers""" 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | import nn.functional as myF 7 | 8 | 9 | class GroupNorm(nn.Module): 10 | def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): 11 | super().__init__() 12 | self.num_groups = num_groups 13 | self.num_channels = num_channels 14 | self.eps = eps 15 | self.affine = affine 16 | if self.affine: 17 | self.weight = nn.Parameter(torch.Tensor(num_channels)) 18 | self.bias = nn.Parameter(torch.Tensor(num_channels)) 19 | else: 20 | self.register_parameter('weight', None) 21 | self.register_parameter('bias', None) 22 | self.reset_parameters() 23 | 24 | def reset_parameters(self): 25 | if self.affine: 26 | self.weight.data.fill_(1) 27 | self.bias.data.zero_() 28 | 29 | def forward(self, x): 30 | return myF.group_norm( 31 | x, self.num_groups, self.weight, self.bias, self.eps 32 | ) 33 | 34 | def extra_repr(self): 35 | return '{num_groups}, {num_channels}, eps={eps}, ' \ 36 | 'affine={affine}'.format(**self.__dict__) 37 | -------------------------------------------------------------------------------- /lib/nn/modules/upsample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | 9 | class BilinearInterpolation2d(nn.Module): 10 | """Bilinear interpolation in space of scale. 11 | 12 | Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale 13 | 14 | Adapted from the CVPR'15 FCN code. 15 | See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py 16 | """ 17 | def __init__(self, in_channels, out_channels, up_scale): 18 | super().__init__() 19 | assert in_channels == out_channels 20 | assert up_scale % 2 == 0, 'Scale should be even' 21 | self.in_channes = in_channels 22 | self.out_channels = out_channels 23 | self.up_scale = int(up_scale) 24 | self.padding = up_scale // 2 25 | 26 | def upsample_filt(size): 27 | factor = (size + 1) // 2 28 | if size % 2 == 1: 29 | center = factor - 1 30 | else: 31 | center = factor - 0.5 32 | og = np.ogrid[:size, :size] 33 | return ((1 - abs(og[0] - center) / factor) * 34 | (1 - abs(og[1] - center) / factor)) 35 | 36 | kernel_size = up_scale * 2 37 | bil_filt = upsample_filt(kernel_size) 38 | 39 | kernel = np.zeros( 40 | (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32 41 | ) 42 | kernel[range(in_channels), range(out_channels), :, :] = bil_filt 43 | 44 | self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, 45 | stride=self.up_scale, padding=self.padding) 46 | 47 | self.upconv.weight.data.copy_(torch.from_numpy(kernel)) 48 | self.upconv.bias.data.fill_(0) 49 | self.upconv.weight.requires_grad = False 50 | self.upconv.bias.requires_grad = False 51 | 52 | def forward(self, x): 53 | return self.upconv(x) 54 | -------------------------------------------------------------------------------- /lib/nn/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | from .parallel_apply import parallel_apply 2 | from .replicate import replicate 3 | from .data_parallel import DataParallel, data_parallel 4 | from .scatter_gather import scatter, gather 5 | 6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel', 7 | 'DataParallel'] 8 | -------------------------------------------------------------------------------- /lib/nn/parallel/_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.cuda.comm as comm 3 | from torch.autograd import Function 4 | 5 | 6 | class Broadcast(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, target_gpus, *inputs): 10 | if not all(input.is_cuda for input in inputs): 11 | raise TypeError('Broadcast function not implemented for CPU tensors') 12 | ctx.target_gpus = target_gpus 13 | if len(inputs) == 0: 14 | return tuple() 15 | ctx.num_inputs = len(inputs) 16 | ctx.input_device = inputs[0].get_device() 17 | outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) 18 | non_differentiables = [] 19 | for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]): 20 | if not input_requires_grad: 21 | for output in outputs: 22 | non_differentiables.append(output[idx]) 23 | ctx.mark_non_differentiable(*non_differentiables) 24 | return tuple([t for tensors in outputs for t in tensors]) 25 | 26 | @staticmethod 27 | def backward(ctx, *grad_outputs): 28 | return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs) 29 | 30 | 31 | class ReduceAddCoalesced(Function): 32 | 33 | @staticmethod 34 | def forward(ctx, destination, num_inputs, *grads): 35 | ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)] 36 | 37 | grads = [grads[i:i + num_inputs] 38 | for i in range(0, len(grads), num_inputs)] 39 | return comm.reduce_add_coalesced(grads, destination) 40 | 41 | @staticmethod 42 | def backward(ctx, *grad_outputs): 43 | return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs) 44 | 45 | 46 | class Gather(Function): 47 | 48 | @staticmethod 49 | def forward(ctx, target_device, dim, *inputs): 50 | assert all(map(lambda i: i.is_cuda, inputs)) 51 | ctx.target_device = target_device 52 | ctx.dim = dim 53 | ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs)) 54 | ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs)) 55 | return comm.gather(inputs, ctx.dim, ctx.target_device) 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output) 60 | 61 | 62 | class Scatter(Function): 63 | 64 | @staticmethod 65 | def forward(ctx, target_gpus, chunk_sizes, dim, input): 66 | ctx.target_gpus = target_gpus 67 | ctx.chunk_sizes = chunk_sizes 68 | ctx.dim = dim 69 | ctx.input_device = input.get_device() if input.is_cuda else -1 70 | streams = None 71 | if ctx.input_device == -1: 72 | # Perform CPU to GPU copies in a background stream 73 | streams = [_get_stream(device) for device in ctx.target_gpus] 74 | outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams) 75 | # Synchronize with the copy stream 76 | if streams is not None: 77 | for i, output in enumerate(outputs): 78 | with torch.cuda.device(ctx.target_gpus[i]): 79 | main_stream = torch.cuda.current_stream() 80 | main_stream.wait_stream(streams[i]) 81 | output.record_stream(main_stream) 82 | return outputs 83 | 84 | @staticmethod 85 | def backward(ctx, *grad_output): 86 | return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output) 87 | 88 | 89 | # background streams used for copying 90 | _streams = None 91 | 92 | 93 | def _get_stream(device): 94 | """Gets a background stream for copying between CPU and GPU""" 95 | global _streams 96 | if device == -1: 97 | return None 98 | if _streams is None: 99 | _streams = [None] * torch.cuda.device_count() 100 | if _streams[device] is None: 101 | _streams[device] = torch.cuda.Stream(device) 102 | return _streams[device] 103 | -------------------------------------------------------------------------------- /lib/nn/parallel/parallel_apply.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | 6 | def get_a_var(obj): 7 | if isinstance(obj, Variable): 8 | return obj 9 | 10 | if isinstance(obj, list) or isinstance(obj, tuple): 11 | results = map(get_a_var, obj) 12 | for result in results: 13 | if isinstance(result, Variable): 14 | return result 15 | if isinstance(obj, dict): 16 | results = map(get_a_var, obj.items()) 17 | for result in results: 18 | if isinstance(result, Variable): 19 | return result 20 | return None 21 | 22 | 23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): 24 | assert len(modules) == len(inputs) 25 | if kwargs_tup is not None: 26 | assert len(modules) == len(kwargs_tup) 27 | else: 28 | kwargs_tup = ({},) * len(modules) 29 | if devices is not None: 30 | assert len(modules) == len(devices) 31 | else: 32 | devices = [None] * len(modules) 33 | 34 | lock = threading.Lock() 35 | results = {} 36 | 37 | def _worker(i, module, input, kwargs, results, lock, device=None): 38 | if device is None: 39 | device = get_a_var(input).get_device() 40 | try: 41 | with torch.cuda.device(device): 42 | output = module(*input, **kwargs) 43 | with lock: 44 | results[i] = output 45 | except Exception as e: 46 | with lock: 47 | results[i] = e 48 | 49 | if len(modules) > 1: 50 | threads = [threading.Thread(target=_worker, 51 | args=(i, module, input, kwargs, results, lock, device), 52 | ) 53 | for i, (module, input, kwargs, device) in 54 | enumerate(zip(modules, inputs, kwargs_tup, devices))] 55 | 56 | for thread in threads: 57 | thread.start() 58 | for thread in threads: 59 | thread.join() 60 | else: 61 | _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0]) 62 | 63 | outputs = [] 64 | for i in range(len(inputs)): 65 | output = results[i] 66 | if isinstance(output, Exception): 67 | raise output 68 | outputs.append(output) 69 | return outputs 70 | -------------------------------------------------------------------------------- /lib/nn/parallel/replicate.py: -------------------------------------------------------------------------------- 1 | import torch.cuda.comm as comm 2 | 3 | 4 | def replicate(network, devices): 5 | from ._functions import Broadcast 6 | 7 | devices = tuple(devices) 8 | num_replicas = len(devices) 9 | 10 | params = list(network.parameters()) 11 | param_indices = {param: idx for idx, param in enumerate(params)} 12 | param_copies = Broadcast.apply(devices, *params) 13 | if len(params) > 0: 14 | param_copies = [param_copies[i:i + len(params)] 15 | for i in range(0, len(param_copies), len(params))] 16 | 17 | buffers = list(network.buffers()) 18 | buffer_indices = {buf: idx for idx, buf in enumerate(buffers)} 19 | buffer_copies = comm.broadcast_coalesced(buffers, devices) 20 | 21 | modules = list(network.modules()) 22 | module_copies = [[] for device in devices] 23 | module_indices = {} 24 | 25 | for i, module in enumerate(modules): 26 | module_indices[module] = i 27 | for j in range(num_replicas): 28 | replica = module.__new__(type(module)) 29 | replica.__dict__ = module.__dict__.copy() 30 | replica._parameters = replica._parameters.copy() 31 | replica._buffers = replica._buffers.copy() 32 | replica._modules = replica._modules.copy() 33 | module_copies[j].append(replica) 34 | 35 | for i, module in enumerate(modules): 36 | for key, child in module._modules.items(): 37 | if child is None: 38 | for j in range(num_replicas): 39 | replica = module_copies[j][i] 40 | replica._modules[key] = None 41 | else: 42 | module_idx = module_indices[child] 43 | for j in range(num_replicas): 44 | replica = module_copies[j][i] 45 | replica._modules[key] = module_copies[j][module_idx] 46 | for key, param in module._parameters.items(): 47 | if param is None: 48 | for j in range(num_replicas): 49 | replica = module_copies[j][i] 50 | replica._parameters[key] = None 51 | else: 52 | param_idx = param_indices[param] 53 | for j in range(num_replicas): 54 | replica = module_copies[j][i] 55 | replica._parameters[key] = param_copies[j][param_idx] 56 | for key, buf in module._buffers.items(): 57 | if buf is None: 58 | for j in range(num_replicas): 59 | replica = module_copies[j][i] 60 | replica._buffers[key] = None 61 | else: 62 | buffer_idx = buffer_indices[buf] 63 | for j in range(num_replicas): 64 | replica = module_copies[j][i] 65 | replica._buffers[key] = buffer_copies[j][buffer_idx] 66 | 67 | return [module_copies[j][0] for j in range(num_replicas)] 68 | -------------------------------------------------------------------------------- /lib/nn/parallel/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import re 3 | import numpy as np 4 | import torch 5 | from torch.autograd import Variable 6 | from ._functions import Scatter, Gather 7 | from torch._six import string_classes, int_classes 8 | 9 | numpy_type_map = { 10 | 'float64': torch.DoubleTensor, 11 | 'float32': torch.FloatTensor, 12 | 'float16': torch.HalfTensor, 13 | 'int64': torch.LongTensor, 14 | 'int32': torch.IntTensor, 15 | 'int16': torch.ShortTensor, 16 | 'int8': torch.CharTensor, 17 | 'uint8': torch.ByteTensor, 18 | } 19 | 20 | 21 | def scatter(inputs, target_gpus, dim=0): 22 | r""" 23 | Slices variables into approximately equal chunks and 24 | distributes them across given GPUs. Duplicates 25 | references to objects that are not variables. Does not 26 | support Tensors. 27 | """ 28 | def scatter_map(obj): 29 | if isinstance(obj, Variable): 30 | return Scatter.apply(target_gpus, None, dim, obj) 31 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 32 | if isinstance(obj, tuple) and len(obj) > 0: 33 | return list(zip(*map(scatter_map, obj))) 34 | if isinstance(obj, list) and len(obj) > 0: 35 | return list(map(list, zip(*map(scatter_map, obj)))) 36 | if isinstance(obj, dict) and len(obj) > 0: 37 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 38 | return [obj for targets in target_gpus] 39 | 40 | # After scatter_map is called, a scatter_map cell will exist. This cell 41 | # has a reference to the actual function scatter_map, which has references 42 | # to a closure that has a reference to the scatter_map cell (because the 43 | # fn is recursive). To avoid this reference cycle, we set the function to 44 | # None, clearing the cell 45 | try: 46 | return scatter_map(inputs) 47 | finally: 48 | scatter_map = None 49 | 50 | 51 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): 52 | r"""Scatter with support for kwargs dictionary""" 53 | inputs = scatter(inputs, target_gpus, dim) if inputs else [] 54 | kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] 55 | if len(inputs) < len(kwargs): 56 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 57 | elif len(kwargs) < len(inputs): 58 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 59 | inputs = tuple(inputs) 60 | kwargs = tuple(kwargs) 61 | return inputs, kwargs 62 | 63 | 64 | def gather(outputs, target_device, dim=0): 65 | r""" 66 | Gathers variables from different GPUs on a specified device 67 | (-1 means the CPU). 68 | """ 69 | error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}" 70 | 71 | def gather_map(outputs): 72 | out = outputs[0] 73 | elem_type = type(out) 74 | if isinstance(out, Variable): 75 | return Gather.apply(target_device, dim, *outputs) 76 | if out is None: 77 | return None 78 | if isinstance(out, collections.Sequence): 79 | return type(out)(map(gather_map, zip(*outputs))) 80 | elif isinstance(out, collections.Mapping): 81 | return {key: gather_map([d[key] for d in outputs]) for key in out} 82 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 83 | and elem_type.__name__ != 'string_': 84 | elem = out 85 | if elem_type.__name__ == 'ndarray': 86 | # array of string classes and object 87 | if re.search('[SaUO]', elem.dtype.str) is not None: 88 | raise TypeError(error_msg.format(elem.dtype)) 89 | 90 | return Variable(torch.from_numpy(np.concatenate(outputs, dim))) 91 | if elem.shape == (): # scalars 92 | py_type = float if elem.dtype.name.startswith('float') else int 93 | return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs)))) 94 | elif isinstance(out, int_classes): 95 | return Variable(torch.LongTensor(outputs)) 96 | elif isinstance(out, float): 97 | return Variable(torch.DoubleTensor(outputs)) 98 | elif isinstance(out, string_classes): 99 | return outputs 100 | 101 | raise TypeError((error_msg.format(elem_type))) 102 | 103 | # Recursive function calls like this create reference cycles. 104 | # Setting the function to None clears the refcycle. 105 | try: 106 | return gather_map(outputs) 107 | finally: 108 | gather_map = None 109 | -------------------------------------------------------------------------------- /lib/roi_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/roi_data/__init__.py -------------------------------------------------------------------------------- /lib/roi_data/data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | from __future__ import unicode_literals 23 | 24 | from collections import namedtuple 25 | import logging 26 | import numpy as np 27 | import threading 28 | 29 | from core.config import cfg 30 | from modeling.generate_anchors import generate_anchors 31 | import utils.boxes as box_utils 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range 38 | # of aspect ratios 39 | FieldOfAnchors = namedtuple( 40 | 'FieldOfAnchors', [ 41 | 'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size', 42 | 'octave', 'aspect' 43 | ] 44 | ) 45 | 46 | # Cache for memoizing _get_field_of_anchors 47 | _threadlocal_foa = threading.local() 48 | 49 | 50 | def get_field_of_anchors( 51 | stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None 52 | ): 53 | global _threadlocal_foa 54 | if not hasattr(_threadlocal_foa, 'cache'): 55 | _threadlocal_foa.cache = {} 56 | 57 | cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) 58 | if cache_key in _threadlocal_foa.cache: 59 | return _threadlocal_foa.cache[cache_key] 60 | 61 | # Anchors at a single feature cell 62 | cell_anchors = generate_anchors( 63 | stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios 64 | ) 65 | num_cell_anchors = cell_anchors.shape[0] 66 | 67 | # Generate canonical proposals from shifted anchors 68 | # Enumerate all shifted positions on the (H, W) grid 69 | fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( 70 | cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) 71 | ) 72 | field_size = int(np.ceil(fpn_max_size / float(stride))) 73 | shifts = np.arange(0, field_size) * stride 74 | shift_x, shift_y = np.meshgrid(shifts, shifts) 75 | shift_x = shift_x.ravel() 76 | shift_y = shift_y.ravel() 77 | shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() 78 | 79 | # Broacast anchors over shifts to enumerate all anchors at all positions 80 | # in the (H, W) grid: 81 | # - add A cell anchors of shape (1, A, 4) to 82 | # - K shifts of shape (K, 1, 4) to get 83 | # - all shifted anchors of shape (K, A, 4) 84 | # - reshape to (K*A, 4) shifted anchors 85 | A = num_cell_anchors 86 | K = shifts.shape[0] 87 | field_of_anchors = ( 88 | cell_anchors.reshape((1, A, 4)) + 89 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 90 | ) 91 | field_of_anchors = field_of_anchors.reshape((K * A, 4)) 92 | foa = FieldOfAnchors( 93 | field_of_anchors=field_of_anchors.astype(np.float32), 94 | num_cell_anchors=num_cell_anchors, 95 | stride=stride, 96 | field_size=field_size, 97 | octave=octave, 98 | aspect=aspect 99 | ) 100 | _threadlocal_foa.cache[cache_key] = foa 101 | return foa 102 | 103 | 104 | def unmap(data, count, inds, fill=0): 105 | """Unmap a subset of item (data) back to the original set of items (of 106 | size count)""" 107 | if count == len(inds): 108 | return data 109 | 110 | if len(data.shape) == 1: 111 | ret = np.empty((count, ), dtype=data.dtype) 112 | ret.fill(fill) 113 | ret[inds] = data 114 | else: 115 | ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype) 116 | ret.fill(fill) 117 | ret[inds, :] = data 118 | return ret 119 | 120 | 121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)): 122 | """Compute bounding-box regression targets for an image.""" 123 | return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype( 124 | np.float32, copy=False 125 | ) 126 | -------------------------------------------------------------------------------- /lib/roi_data/keypoint_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Construct minibatches for Mask R-CNN training when keypoints are enabled. 16 | Handles the minibatch blobs that are specific to training Mask R-CNN for 17 | keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are 18 | handled by their respecitive roi_data modules. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | from __future__ import unicode_literals 25 | 26 | import numpy as np 27 | 28 | from core.config import cfg 29 | import utils.blob as blob_utils 30 | import utils.keypoints as keypoint_utils 31 | 32 | 33 | def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, 34 | batch_idx): 35 | """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" 36 | # Note: gt_inds must match how they're computed in 37 | # datasets.json_dataset._merge_proposal_boxes_into_roidb 38 | gt_inds = np.where(roidb['gt_classes'] > 0)[0] 39 | max_overlaps = roidb['max_overlaps'] 40 | gt_keypoints = roidb['gt_keypoints'] 41 | 42 | ind_kp = gt_inds[roidb['box_to_gt_ind_map']] 43 | within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) 44 | vis_kp = gt_keypoints[ind_kp, 2, :] > 0 45 | is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 46 | kp_fg_inds = np.where( 47 | np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0] 48 | 49 | kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) 50 | if kp_fg_inds.size > kp_fg_rois_per_this_image: 51 | kp_fg_inds = np.random.choice( 52 | kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False) 53 | 54 | sampled_fg_rois = roidb['boxes'][kp_fg_inds] 55 | box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] 56 | 57 | num_keypoints = gt_keypoints.shape[2] 58 | sampled_keypoints = -np.ones( 59 | (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), 60 | dtype=gt_keypoints.dtype) 61 | for ii in range(len(sampled_fg_rois)): 62 | ind = box_to_gt_ind_map[ii] 63 | if ind >= 0: 64 | sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] 65 | assert np.sum(sampled_keypoints[ii, 2, :]) > 0 66 | 67 | heats, weights = keypoint_utils.keypoints_to_heatmap_labels( 68 | sampled_keypoints, sampled_fg_rois) 69 | 70 | shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS,) 71 | heats = heats.reshape(shape) 72 | weights = weights.reshape(shape) 73 | 74 | sampled_fg_rois *= im_scale 75 | repeated_batch_idx = batch_idx * blob_utils.ones((sampled_fg_rois.shape[0], 76 | 1)) 77 | sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) 78 | 79 | blobs['keypoint_rois'] = sampled_fg_rois 80 | blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) 81 | blobs['keypoint_weights'] = weights 82 | 83 | 84 | def finalize_keypoint_minibatch(blobs, valid): 85 | """Finalize the minibatch after blobs for all minibatch images have been 86 | collated. 87 | """ 88 | min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH 89 | num_visible_keypoints = np.sum(blobs['keypoint_weights']) 90 | valid = (valid and len(blobs['keypoint_weights']) > 0 91 | and num_visible_keypoints > min_count) 92 | # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False. 93 | # See modeling.model_builder.add_keypoint_losses 94 | norm = num_visible_keypoints / ( 95 | cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.TRAIN. 96 | FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS) 97 | blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32) 98 | return valid 99 | 100 | 101 | def _within_box(points, boxes): 102 | """Validate which keypoints are contained inside a given box. 103 | 104 | points: Nx2xK 105 | boxes: Nx4 106 | output: NxK 107 | """ 108 | x_within = np.logical_and( 109 | points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1), 110 | points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1)) 111 | y_within = np.logical_and( 112 | points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1), 113 | points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1)) 114 | return np.logical_and(x_within, y_within) 115 | -------------------------------------------------------------------------------- /lib/roi_data/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Construct minibatches for Mask R-CNN training. Handles the minibatch blobs 16 | that are specific to Mask R-CNN. Other blobs that are generic to RPN or 17 | Fast/er R-CNN are handled by their respecitive roi_data modules. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | import logging 26 | import numpy as np 27 | 28 | from core.config import cfg 29 | import utils.blob as blob_utils 30 | import utils.boxes as box_utils 31 | import utils.segms as segm_utils 32 | 33 | 34 | def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): 35 | """Add Mask R-CNN specific blobs to the input blob dictionary.""" 36 | # Prepare the mask targets by associating one gt mask to each training roi 37 | # that has a fg (non-bg) class label. 38 | M = cfg.MRCNN.RESOLUTION 39 | polys_gt_inds = np.where((roidb['gt_classes'] > 0) & 40 | (roidb['is_crowd'] == 0))[0] 41 | polys_gt = [roidb['segms'][i] for i in polys_gt_inds] 42 | boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) 43 | # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] 44 | fg_inds = np.where(blobs['labels_int32'] > 0)[0] 45 | roi_has_mask = blobs['labels_int32'].copy() 46 | roi_has_mask[roi_has_mask > 0] = 1 47 | 48 | if fg_inds.shape[0] > 0: 49 | # Class labels for the foreground rois 50 | mask_class_labels = blobs['labels_int32'][fg_inds] 51 | masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) 52 | 53 | # Find overlap between all foreground rois and the bounding boxes 54 | # enclosing each segmentation 55 | rois_fg = sampled_boxes[fg_inds] 56 | overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( 57 | rois_fg.astype(np.float32, copy=False), 58 | boxes_from_polys.astype(np.float32, copy=False)) 59 | # Map from each fg rois to the index of the mask with highest overlap 60 | # (measured by bbox overlap) 61 | fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) 62 | 63 | # add fg targets 64 | for i in range(rois_fg.shape[0]): 65 | fg_polys_ind = fg_polys_inds[i] 66 | poly_gt = polys_gt[fg_polys_ind] 67 | roi_fg = rois_fg[i] 68 | # Rasterize the portion of the polygon mask within the given fg roi 69 | # to an M x M binary image 70 | mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) 71 | mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary 72 | masks[i, :] = np.reshape(mask, M**2) 73 | else: # If there are no fg masks (it does happen) 74 | # The network cannot handle empty blobs, so we must provide a mask 75 | # We simply take the first bg roi, given it an all -1's mask (ignore 76 | # label), and label it with class zero (bg). 77 | bg_inds = np.where(blobs['labels_int32'] == 0)[0] 78 | # rois_fg is actually one background roi, but that's ok because ... 79 | rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) 80 | # We give it an -1's blob (ignore label) 81 | masks = -blob_utils.ones((1, M**2), int32=True) 82 | # We label it with class = 0 (background) 83 | mask_class_labels = blob_utils.zeros((1, )) 84 | # Mark that the first roi has a mask 85 | roi_has_mask[0] = 1 86 | 87 | if cfg.MRCNN.CLS_SPECIFIC_MASK: 88 | masks = _expand_to_class_specific_mask_targets(masks, 89 | mask_class_labels) 90 | 91 | # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) 92 | rois_fg *= im_scale 93 | repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) 94 | rois_fg = np.hstack((repeated_batch_idx, rois_fg)) 95 | 96 | # Update blobs dict with Mask R-CNN blobs 97 | blobs['mask_rois'] = rois_fg 98 | blobs['roi_has_mask_int32'] = roi_has_mask 99 | blobs['masks_int32'] = masks 100 | 101 | 102 | def _expand_to_class_specific_mask_targets(masks, mask_class_labels): 103 | """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) 104 | to encode class specific mask targets. 105 | """ 106 | assert masks.shape[0] == mask_class_labels.shape[0] 107 | M = cfg.MRCNN.RESOLUTION 108 | 109 | # Target values of -1 are "don't care" / ignore labels 110 | mask_targets = -blob_utils.ones( 111 | (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True) 112 | 113 | for i in range(masks.shape[0]): 114 | cls = int(mask_class_labels[i]) 115 | start = M**2 * cls 116 | end = start + M**2 117 | # Ignore background instance 118 | # (only happens when there is no fg samples in an image) 119 | if cls > 0: 120 | mask_targets[i, start:end] = masks[i, :] 121 | 122 | return mask_targets 123 | -------------------------------------------------------------------------------- /lib/roi_data/minibatch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from core.config import cfg 5 | import utils.blob as blob_utils 6 | import roi_data.rpn 7 | 8 | 9 | def get_minibatch_blob_names(is_training=True): 10 | """Return blob names in the order in which they are read by the data loader. 11 | """ 12 | # data blob: holds a batch of N images, each with 3 channels 13 | blob_names = ['data'] 14 | if cfg.RPN.RPN_ON: 15 | # RPN-only or end-to-end Faster R-CNN 16 | blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training) 17 | elif cfg.RETINANET.RETINANET_ON: 18 | raise NotImplementedError 19 | else: 20 | # Fast R-CNN like models trained on precomputed proposals 21 | blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names( 22 | is_training=is_training 23 | ) 24 | return blob_names 25 | 26 | 27 | def get_minibatch(roidb): 28 | """Given a roidb, construct a minibatch sampled from it.""" 29 | # We collect blobs from each image onto a list and then concat them into a 30 | # single tensor, hence we initialize each blob to an empty list 31 | blobs = {k: [] for k in get_minibatch_blob_names()} 32 | 33 | # Get the input image blob 34 | im_blob, im_scales = _get_image_blob(roidb) 35 | blobs['data'] = im_blob 36 | if cfg.RPN.RPN_ON: 37 | # RPN-only or end-to-end Faster/Mask R-CNN 38 | valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb) 39 | elif cfg.RETINANET.RETINANET_ON: 40 | raise NotImplementedError 41 | else: 42 | # Fast R-CNN like models trained on precomputed proposals 43 | valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 44 | return blobs, valid 45 | 46 | 47 | def _get_image_blob(roidb): 48 | """Builds an input blob from the images in the roidb at the specified 49 | scales. 50 | """ 51 | num_images = len(roidb) 52 | # Sample random scales to use for each image in this batch 53 | scale_inds = np.random.randint( 54 | 0, high=len(cfg.TRAIN.SCALES), size=num_images) 55 | processed_ims = [] 56 | im_scales = [] 57 | for i in range(num_images): 58 | im = cv2.imread(roidb[i]['image']) 59 | assert im is not None, \ 60 | 'Failed to read image \'{}\''.format(roidb[i]['image']) 61 | # If NOT using opencv to read in images, uncomment following lines 62 | # if len(im.shape) == 2: 63 | # im = im[:, :, np.newaxis] 64 | # im = np.concatenate((im, im, im), axis=2) 65 | # # flip the channel, since the original one using cv2 66 | # # rgb -> bgr 67 | # im = im[:, :, ::-1] 68 | if roidb[i]['flipped']: 69 | im = im[:, ::-1, :] 70 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 71 | im, im_scale = blob_utils.prep_im_for_blob( 72 | im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) 73 | im_scales.append(im_scale[0]) 74 | processed_ims.append(im[0]) 75 | 76 | # Create a blob to hold the input images [n, c, h, w] 77 | blob = blob_utils.im_list_to_blob(processed_ims) 78 | 79 | return blob, im_scales 80 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import print_function 9 | 10 | from Cython.Build import cythonize 11 | from Cython.Distutils import build_ext 12 | from setuptools import Extension 13 | from setuptools import setup 14 | 15 | import numpy as np 16 | 17 | 18 | # Obtain the numpy include directory. This logic works across numpy versions. 19 | try: 20 | numpy_include = np.get_include() 21 | except AttributeError: 22 | numpy_include = np.get_numpy_include() 23 | 24 | 25 | ext_modules = [ 26 | Extension( 27 | name='utils.cython_bbox', 28 | sources=['utils/cython_bbox.pyx'], 29 | extra_compile_args=['-Wno-cpp'], 30 | include_dirs=[numpy_include] 31 | ), 32 | Extension( 33 | name='utils.cython_nms', 34 | sources=['utils/cython_nms.pyx'], 35 | extra_compile_args=['-Wno-cpp'], 36 | include_dirs=[numpy_include] 37 | ) 38 | ] 39 | 40 | setup( 41 | name='mask_rcnn', 42 | ext_modules=cythonize(ext_modules) 43 | ) 44 | 45 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityaarun1/Detectron.pytorch/c780eb3d22808911978b317fe97cf544c8c47d8b/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick 22 | # -------------------------------------------------------- 23 | """blob helper functions.""" 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | from __future__ import unicode_literals 29 | 30 | from six.moves import cPickle as pickle 31 | import numpy as np 32 | import cv2 33 | 34 | from core.config import cfg 35 | 36 | 37 | def get_image_blob(im, target_scale, target_max_size): 38 | """Convert an image into a network input. 39 | 40 | Arguments: 41 | im (ndarray): a color image in BGR order 42 | 43 | Returns: 44 | blob (ndarray): a data blob holding an image pyramid 45 | im_scale (float): image scale (target size) / (original size) 46 | im_info (ndarray) 47 | """ 48 | processed_im, im_scale = prep_im_for_blob( 49 | im, cfg.PIXEL_MEANS, [target_scale], target_max_size 50 | ) 51 | blob = im_list_to_blob(processed_im) 52 | # NOTE: this height and width may be larger than actual scaled input image 53 | # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are 54 | # maintaining this behavior for now to make existing results exactly 55 | # reproducible (in practice using the true input image height and width 56 | # yields nearly the same results, but they are sometimes slightly different 57 | # because predictions near the edge of the image will be pruned more 58 | # aggressively). 59 | height, width = blob.shape[2], blob.shape[3] 60 | im_info = np.hstack((height, width, im_scale))[np.newaxis, :] 61 | return blob, im_scale, im_info.astype(np.float32) 62 | 63 | 64 | def im_list_to_blob(ims): 65 | """Convert a list of images into a network input. Assumes images were 66 | prepared using prep_im_for_blob or equivalent: i.e. 67 | - BGR channel order 68 | - pixel means subtracted 69 | - resized to the desired input size 70 | - float32 numpy ndarray format 71 | Output is a 4D HCHW tensor of the images concatenated along axis 0 with 72 | shape. 73 | """ 74 | if not isinstance(ims, list): 75 | ims = [ims] 76 | max_shape = get_max_shape([im.shape[:2] for im in ims]) 77 | 78 | num_images = len(ims) 79 | blob = np.zeros( 80 | (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) 81 | for i in range(num_images): 82 | im = ims[i] 83 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 84 | # Move channels (axis 3) to axis 1 85 | # Axis order will become: (batch elem, channel, height, width) 86 | channel_swap = (0, 3, 1, 2) 87 | blob = blob.transpose(channel_swap) 88 | return blob 89 | 90 | 91 | def get_max_shape(im_shapes): 92 | """Calculate max spatial size (h, w) for batching given a list of image shapes 93 | """ 94 | max_shape = np.array(im_shapes).max(axis=0) 95 | assert max_shape.size == 2 96 | # Pad the image so they can be divisible by a stride 97 | if cfg.FPN.FPN_ON: 98 | stride = float(cfg.FPN.COARSEST_STRIDE) 99 | max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) 100 | max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) 101 | return max_shape 102 | 103 | 104 | def prep_im_for_blob(im, pixel_means, target_sizes, max_size): 105 | """Prepare an image for use as a network input blob. Specially: 106 | - Subtract per-channel pixel mean 107 | - Convert to float32 108 | - Rescale to each of the specified target size (capped at max_size) 109 | Returns a list of transformed images, one for each target size. Also returns 110 | the scale factors that were used to compute each returned image. 111 | """ 112 | im = im.astype(np.float32, copy=False) 113 | im -= pixel_means 114 | im_shape = im.shape 115 | im_size_min = np.min(im_shape[0:2]) 116 | im_size_max = np.max(im_shape[0:2]) 117 | 118 | ims = [] 119 | im_scales = [] 120 | for target_size in target_sizes: 121 | im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) 122 | im_resized = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 123 | interpolation=cv2.INTER_LINEAR) 124 | ims.append(im_resized) 125 | im_scales.append(im_scale) 126 | return ims, im_scales 127 | 128 | 129 | def get_im_blob_sizes(im_shape, target_sizes, max_size): 130 | """Calculate im blob size for multiple target_sizes given original im shape 131 | """ 132 | im_size_min = np.min(im_shape) 133 | im_size_max = np.max(im_shape) 134 | im_sizes = [] 135 | for target_size in target_sizes: 136 | im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) 137 | im_sizes.append(np.round(im_shape * im_scale)) 138 | return np.array(im_sizes) 139 | 140 | 141 | def get_target_scale(im_size_min, im_size_max, target_size, max_size): 142 | """Calculate target resize scale 143 | """ 144 | im_scale = float(target_size) / float(im_size_min) 145 | # Prevent the biggest axis from being more than max_size 146 | if np.round(im_scale * im_size_max) > max_size: 147 | im_scale = float(max_size) / float(im_size_max) 148 | return im_scale 149 | 150 | 151 | def zeros(shape, int32=False): 152 | """Return a blob of all zeros of the given shape with the correct float or 153 | int data type. 154 | """ 155 | return np.zeros(shape, dtype=np.int32 if int32 else np.float32) 156 | 157 | 158 | def ones(shape, int32=False): 159 | """Return a blob of all ones of the given shape with the correct float or 160 | int data type. 161 | """ 162 | return np.ones(shape, dtype=np.int32 if int32 else np.float32) 163 | 164 | 165 | def serialize(obj): 166 | """Serialize a Python object using pickle and encode it as an array of 167 | float32 values so that it can be feed into the workspace. See deserialize(). 168 | """ 169 | return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32) 170 | 171 | 172 | def deserialize(arr): 173 | """Unserialize a Python object from an array of float32 values fetched from 174 | a workspace. See serialize(). 175 | """ 176 | return pickle.loads(arr.astype(np.uint8).tobytes()) 177 | -------------------------------------------------------------------------------- /lib/utils/collections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """A simple attribute dictionary used for representing configuration options.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | 24 | class AttrDict(dict): 25 | 26 | IMMUTABLE = '__immutable__' 27 | 28 | def __init__(self, *args, **kwargs): 29 | super(AttrDict, self).__init__(*args, **kwargs) 30 | self.__dict__[AttrDict.IMMUTABLE] = False 31 | 32 | def __getattr__(self, name): 33 | if name in self.__dict__: 34 | return self.__dict__[name] 35 | elif name in self: 36 | return self[name] 37 | else: 38 | raise AttributeError(name) 39 | 40 | def __setattr__(self, name, value): 41 | if not self.__dict__[AttrDict.IMMUTABLE]: 42 | if name in self.__dict__: 43 | self.__dict__[name] = value 44 | else: 45 | self[name] = value 46 | else: 47 | raise AttributeError( 48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 49 | format(name, value) 50 | ) 51 | 52 | def immutable(self, is_immutable): 53 | """Set immutability to is_immutable and recursively apply the setting 54 | to all nested AttrDicts. 55 | """ 56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 57 | # Recursively set immutable state 58 | for v in self.__dict__.values(): 59 | if isinstance(v, AttrDict): 60 | v.immutable(is_immutable) 61 | for v in self.values(): 62 | if isinstance(v, AttrDict): 63 | v.immutable(is_immutable) 64 | 65 | def is_immutable(self): 66 | return self.__dict__[AttrDict.IMMUTABLE] 67 | -------------------------------------------------------------------------------- /lib/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """An awesome colormap for really neat visualizations.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import numpy as np 24 | 25 | 26 | def colormap(rgb=False): 27 | color_list = np.array( 28 | [ 29 | 0.000, 0.447, 0.741, 30 | 0.850, 0.325, 0.098, 31 | 0.929, 0.694, 0.125, 32 | 0.494, 0.184, 0.556, 33 | 0.466, 0.674, 0.188, 34 | 0.301, 0.745, 0.933, 35 | 0.635, 0.078, 0.184, 36 | 0.300, 0.300, 0.300, 37 | 0.600, 0.600, 0.600, 38 | 1.000, 0.000, 0.000, 39 | 1.000, 0.500, 0.000, 40 | 0.749, 0.749, 0.000, 41 | 0.000, 1.000, 0.000, 42 | 0.000, 0.000, 1.000, 43 | 0.667, 0.000, 1.000, 44 | 0.333, 0.333, 0.000, 45 | 0.333, 0.667, 0.000, 46 | 0.333, 1.000, 0.000, 47 | 0.667, 0.333, 0.000, 48 | 0.667, 0.667, 0.000, 49 | 0.667, 1.000, 0.000, 50 | 1.000, 0.333, 0.000, 51 | 1.000, 0.667, 0.000, 52 | 1.000, 1.000, 0.000, 53 | 0.000, 0.333, 0.500, 54 | 0.000, 0.667, 0.500, 55 | 0.000, 1.000, 0.500, 56 | 0.333, 0.000, 0.500, 57 | 0.333, 0.333, 0.500, 58 | 0.333, 0.667, 0.500, 59 | 0.333, 1.000, 0.500, 60 | 0.667, 0.000, 0.500, 61 | 0.667, 0.333, 0.500, 62 | 0.667, 0.667, 0.500, 63 | 0.667, 1.000, 0.500, 64 | 1.000, 0.000, 0.500, 65 | 1.000, 0.333, 0.500, 66 | 1.000, 0.667, 0.500, 67 | 1.000, 1.000, 0.500, 68 | 0.000, 0.333, 1.000, 69 | 0.000, 0.667, 1.000, 70 | 0.000, 1.000, 1.000, 71 | 0.333, 0.000, 1.000, 72 | 0.333, 0.333, 1.000, 73 | 0.333, 0.667, 1.000, 74 | 0.333, 1.000, 1.000, 75 | 0.667, 0.000, 1.000, 76 | 0.667, 0.333, 1.000, 77 | 0.667, 0.667, 1.000, 78 | 0.667, 1.000, 1.000, 79 | 1.000, 0.000, 1.000, 80 | 1.000, 0.333, 1.000, 81 | 1.000, 0.667, 1.000, 82 | 0.167, 0.000, 0.000, 83 | 0.333, 0.000, 0.000, 84 | 0.500, 0.000, 0.000, 85 | 0.667, 0.000, 0.000, 86 | 0.833, 0.000, 0.000, 87 | 1.000, 0.000, 0.000, 88 | 0.000, 0.167, 0.000, 89 | 0.000, 0.333, 0.000, 90 | 0.000, 0.500, 0.000, 91 | 0.000, 0.667, 0.000, 92 | 0.000, 0.833, 0.000, 93 | 0.000, 1.000, 0.000, 94 | 0.000, 0.000, 0.167, 95 | 0.000, 0.000, 0.333, 96 | 0.000, 0.000, 0.500, 97 | 0.000, 0.000, 0.667, 98 | 0.000, 0.000, 0.833, 99 | 0.000, 0.000, 1.000, 100 | 0.000, 0.000, 0.000, 101 | 0.143, 0.143, 0.143, 102 | 0.286, 0.286, 0.286, 103 | 0.429, 0.429, 0.429, 104 | 0.571, 0.571, 0.571, 105 | 0.714, 0.714, 0.714, 106 | 0.857, 0.857, 0.857, 107 | 1.000, 1.000, 1.000 108 | ] 109 | ).astype(np.float32) 110 | color_list = color_list.reshape((-1, 3)) * 255 111 | if not rgb: 112 | color_list = color_list[:, ::-1] 113 | return color_list 114 | -------------------------------------------------------------------------------- /lib/utils/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /lib/utils/detectron_weight_helper.py: -------------------------------------------------------------------------------- 1 | """Helper functions for loading pretrained weights from Detectron pickle files 2 | """ 3 | 4 | import pickle 5 | import re 6 | import torch 7 | 8 | 9 | def load_detectron_weight(net, detectron_weight_file): 10 | name_mapping, orphan_in_detectron = net.detectron_weight_mapping 11 | 12 | with open(detectron_weight_file, 'rb') as fp: 13 | src_blobs = pickle.load(fp, encoding='latin1') 14 | if 'blobs' in src_blobs: 15 | src_blobs = src_blobs['blobs'] 16 | 17 | params = net.state_dict() 18 | for p_name, p_tensor in params.items(): 19 | d_name = name_mapping[p_name] 20 | if isinstance(d_name, str): # maybe str, None or True 21 | p_tensor.copy_(torch.Tensor(src_blobs[d_name])) 22 | 23 | 24 | def resnet_weights_name_pattern(): 25 | pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+") 26 | return pattern 27 | 28 | 29 | if __name__ == '__main__': 30 | """Testing""" 31 | from pprint import pprint 32 | import sys 33 | sys.path.insert(0, '..') 34 | from modeling.model_builder import Generalized_RCNN 35 | from core.config import cfg, cfg_from_file 36 | 37 | cfg.MODEL.NUM_CLASSES = 81 38 | cfg_from_file('../../cfgs/res50_mask.yml') 39 | net = Generalized_RCNN() 40 | 41 | # pprint(list(net.state_dict().keys()), width=1) 42 | 43 | mapping, orphans = net.detectron_weight_mapping 44 | state_dict = net.state_dict() 45 | 46 | for k in mapping.keys(): 47 | assert k in state_dict, '%s' % k 48 | 49 | rest = set(state_dict.keys()) - set(mapping.keys()) 50 | assert len(rest) == 0 51 | -------------------------------------------------------------------------------- /lib/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Environment helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import os 24 | import sys 25 | 26 | # Default value of the CMake install prefix 27 | _CMAKE_INSTALL_PREFIX = '/usr/local' 28 | 29 | 30 | def get_runtime_dir(): 31 | """Retrieve the path to the runtime directory.""" 32 | return os.getcwd() 33 | 34 | 35 | def get_py_bin_ext(): 36 | """Retrieve python binary extension.""" 37 | return '.py' 38 | 39 | 40 | def set_up_matplotlib(): 41 | """Set matplotlib up.""" 42 | import matplotlib 43 | # Use a non-interactive backend 44 | matplotlib.use('Agg') 45 | 46 | 47 | def exit_on_error(): 48 | """Exit from a detectron tool when there's an error.""" 49 | sys.exit(1) 50 | -------------------------------------------------------------------------------- /lib/utils/fpn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import utils.boxes as box_utils 4 | from core.config import cfg 5 | 6 | 7 | # ---------------------------------------------------------------------------- # 8 | # Helper functions for working with multilevel FPN RoIs 9 | # ---------------------------------------------------------------------------- # 10 | 11 | def map_rois_to_fpn_levels(rois, k_min, k_max): 12 | """Determine which FPN level each RoI in a set of RoIs should map to based 13 | on the heuristic in the FPN paper. 14 | """ 15 | # Compute level ids 16 | areas, neg_idx = box_utils.boxes_area(rois) 17 | areas[neg_idx] = 0 # np.sqrt will remove the entries with negative value 18 | s = np.sqrt(areas) 19 | s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 20 | lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 21 | 22 | # Eqn.(1) in FPN paper 23 | target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) 24 | target_lvls = np.clip(target_lvls, k_min, k_max) 25 | 26 | # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs 27 | # target_lvls[neg_idx] = -1 28 | return target_lvls 29 | 30 | 31 | def add_multilevel_roi_blobs( 32 | blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max 33 | ): 34 | """Add RoI blobs for multiple FPN levels to the blobs dict. 35 | 36 | blobs: a dict mapping from blob name to numpy ndarray 37 | blob_prefix: name prefix to use for the FPN blobs 38 | rois: the source rois as a 2D numpy array of shape (N, 5) where each row is 39 | an roi and the columns encode (batch_idx, x1, y1, x2, y2) 40 | target_lvls: numpy array of shape (N, ) indicating which FPN level each roi 41 | in rois should be assigned to. -1 means correspoind roi should be discarded. 42 | lvl_min: the finest (highest resolution) FPN level (e.g., 2) 43 | lvl_max: the coarest (lowest resolution) FPN level (e.g., 6) 44 | """ 45 | rois_idx_order = np.empty((0, )) 46 | rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert 47 | # target_lvls = remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls) 48 | for lvl in range(lvl_min, lvl_max + 1): 49 | idx_lvl = np.where(target_lvls == lvl)[0] 50 | blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :] 51 | rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) 52 | rois_stacked = np.vstack( 53 | [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] 54 | ) 55 | rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) 56 | blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore 57 | # Sanity check that restore order is correct 58 | assert (rois_stacked[rois_idx_restore] == rois).all() 59 | 60 | 61 | def remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls): 62 | """ Delete roi entries that have negative area (Uncompleted) """ 63 | idx_neg = np.where(target_lvls == -1)[0] 64 | rois = np.delete(rois, idx_neg, axis=0) 65 | blobs[blob_prefix] = rois 66 | target_lvls = np.delete(target_lvls, idx_neg, axis=0) 67 | #TODO: other blobs in faster_rcnn.get_fast_rcnn_blob_names should also be modified 68 | return target_lvls 69 | -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Image helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import numpy as np 25 | 26 | 27 | def aspect_ratio_rel(im, aspect_ratio): 28 | """Performs width-relative aspect ratio transformation.""" 29 | im_h, im_w = im.shape[:2] 30 | im_ar_w = int(round(aspect_ratio * im_w)) 31 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h)) 32 | return im_ar 33 | 34 | 35 | def aspect_ratio_abs(im, aspect_ratio): 36 | """Performs absolute aspect ratio transformation.""" 37 | im_h, im_w = im.shape[:2] 38 | im_area = im_h * im_w 39 | 40 | im_ar_w = np.sqrt(im_area * aspect_ratio) 41 | im_ar_h = np.sqrt(im_area / aspect_ratio) 42 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio) 43 | 44 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h))) 45 | return im_ar 46 | -------------------------------------------------------------------------------- /lib/utils/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """IO utilities.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from six.moves import cPickle as pickle 24 | import hashlib 25 | import logging 26 | import os 27 | import re 28 | import sys 29 | try: 30 | from urllib.request import urlopen 31 | except ImportError: #python2 32 | from urllib2 import urlopen 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | _DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron' 37 | 38 | 39 | def save_object(obj, file_name): 40 | """Save a Python object by pickling it.""" 41 | file_name = os.path.abspath(file_name) 42 | with open(file_name, 'wb') as f: 43 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 44 | 45 | 46 | def cache_url(url_or_file, cache_dir): 47 | """Download the file specified by the URL to the cache_dir and return the 48 | path to the cached file. If the argument is not a URL, simply return it as 49 | is. 50 | """ 51 | is_url = re.match(r'^(?:http)s?://', url_or_file, re.IGNORECASE) is not None 52 | 53 | if not is_url: 54 | return url_or_file 55 | 56 | url = url_or_file 57 | assert url.startswith(_DETECTRON_S3_BASE_URL), \ 58 | ('Detectron only automatically caches URLs in the Detectron S3 ' 59 | 'bucket: {}').format(_DETECTRON_S3_BASE_URL) 60 | 61 | cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir) 62 | if os.path.exists(cache_file_path): 63 | assert_cache_file_is_ok(url, cache_file_path) 64 | return cache_file_path 65 | 66 | cache_file_dir = os.path.dirname(cache_file_path) 67 | if not os.path.exists(cache_file_dir): 68 | os.makedirs(cache_file_dir) 69 | 70 | logger.info('Downloading remote file {} to {}'.format(url, cache_file_path)) 71 | download_url(url, cache_file_path) 72 | assert_cache_file_is_ok(url, cache_file_path) 73 | return cache_file_path 74 | 75 | 76 | def assert_cache_file_is_ok(url, file_path): 77 | """Check that cache file has the correct hash.""" 78 | # File is already in the cache, verify that the md5sum matches and 79 | # return local path 80 | cache_file_md5sum = _get_file_md5sum(file_path) 81 | ref_md5sum = _get_reference_md5sum(url) 82 | assert cache_file_md5sum == ref_md5sum, \ 83 | ('Target URL {} appears to be downloaded to the local cache file ' 84 | '{}, but the md5 hash of the local file does not match the ' 85 | 'reference (actual: {} vs. expected: {}). You may wish to delete ' 86 | 'the cached file and try again to trigger automatic ' 87 | 'download.').format(url, file_path, cache_file_md5sum, ref_md5sum) 88 | 89 | 90 | def _progress_bar(count, total): 91 | """Report download progress. 92 | Credit: 93 | https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113 94 | """ 95 | bar_len = 60 96 | filled_len = int(round(bar_len * count / float(total))) 97 | 98 | percents = round(100.0 * count / float(total), 1) 99 | bar = '=' * filled_len + '-' * (bar_len - filled_len) 100 | 101 | sys.stdout.write( 102 | ' [{}] {}% of {:.1f}MB file \r'. 103 | format(bar, percents, total / 1024 / 1024) 104 | ) 105 | sys.stdout.flush() 106 | if count >= total: 107 | sys.stdout.write('\n') 108 | 109 | 110 | def download_url( 111 | url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar 112 | ): 113 | """Download url and write it to dst_file_path. 114 | Credit: 115 | https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook 116 | """ 117 | response = urlopen(url) 118 | total_size = response.info().getheader('Content-Length').strip() 119 | total_size = int(total_size) 120 | bytes_so_far = 0 121 | 122 | with open(dst_file_path, 'wb') as f: 123 | while 1: 124 | chunk = response.read(chunk_size) 125 | bytes_so_far += len(chunk) 126 | if not chunk: 127 | break 128 | if progress_hook: 129 | progress_hook(bytes_so_far, total_size) 130 | f.write(chunk) 131 | 132 | return bytes_so_far 133 | 134 | 135 | def _get_file_md5sum(file_name): 136 | """Compute the md5 hash of a file.""" 137 | hash_obj = hashlib.md5() 138 | with open(file_name, 'r') as f: 139 | hash_obj.update(f.read()) 140 | return hash_obj.hexdigest() 141 | 142 | 143 | def _get_reference_md5sum(url): 144 | """By convention the md5 hash for url is stored in url + '.md5sum'.""" 145 | url_md5sum = url + '.md5sum' 146 | md5sum = urlopen(url_md5sum).read().strip() 147 | return md5sum 148 | -------------------------------------------------------------------------------- /lib/utils/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Utilities for logging.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | from __future__ import unicode_literals 21 | 22 | from collections import deque 23 | from email.mime.text import MIMEText 24 | import json 25 | import logging 26 | import numpy as np 27 | import smtplib 28 | import sys 29 | 30 | from core.config import cfg 31 | 32 | # Print lower precision floating point values than default FLOAT_REPR 33 | # Note! Has no use for json encode with C speedups 34 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f') 35 | 36 | 37 | def log_json_stats(stats, sort_keys=True): 38 | print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys))) 39 | 40 | 41 | def log_stats(stats, misc_args): 42 | """Log training statistics to terminal""" 43 | if hasattr(misc_args, 'epoch'): 44 | lines = "[%s][%s][Epoch %d][Iter %d / %d]\n" % ( 45 | misc_args.run_name, misc_args.cfg_filename, 46 | misc_args.epoch, misc_args.step, misc_args.iters_per_epoch) 47 | else: 48 | lines = "[%s][%s][Step %d / %d]\n" % ( 49 | misc_args.run_name, misc_args.cfg_filename, stats['iter'], cfg.SOLVER.MAX_ITER) 50 | 51 | lines += "\t\tloss: %.6f, lr: %.6f time: %.6f, eta: %s\n" % ( 52 | stats['loss'], stats['lr'], stats['time'], stats['eta'] 53 | ) 54 | if stats['metrics']: 55 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['metrics'].items()) + "\n" 56 | if stats['head_losses']: 57 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['head_losses'].items()) + "\n" 58 | if cfg.RPN.RPN_ON: 59 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_losses'].items()) + "\n" 60 | if cfg.FPN.FPN_ON: 61 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_cls_losses'].items()) + "\n" 62 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_bbox_losses'].items()) + "\n" 63 | print(lines[:-1]) # remove last new line 64 | 65 | 66 | class SmoothedValue(object): 67 | """Track a series of values and provide access to smoothed values over a 68 | window or the global series average. 69 | """ 70 | 71 | def __init__(self, window_size): 72 | self.deque = deque(maxlen=window_size) 73 | self.series = [] 74 | self.total = 0.0 75 | self.count = 0 76 | 77 | def AddValue(self, value): 78 | self.deque.append(value) 79 | self.series.append(value) 80 | self.count += 1 81 | self.total += value 82 | 83 | def GetMedianValue(self): 84 | return np.median(self.deque) 85 | 86 | def GetAverageValue(self): 87 | return np.mean(self.deque) 88 | 89 | def GetGlobalAverageValue(self): 90 | return self.total / self.count 91 | 92 | 93 | def send_email(subject, body, to): 94 | s = smtplib.SMTP('localhost') 95 | mime = MIMEText(body) 96 | mime['Subject'] = subject 97 | mime['To'] = to 98 | s.sendmail('detectron', to, mime.as_string()) 99 | 100 | 101 | def setup_logging(name): 102 | FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s' 103 | # Manually clear root loggers to prevent any module that may have called 104 | # logging.basicConfig() from blocking our logging setup 105 | logging.root.handlers = [] 106 | logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) 107 | logger = logging.getLogger(name) 108 | return logger 109 | -------------------------------------------------------------------------------- /lib/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | from collections import defaultdict, Iterable 4 | from copy import deepcopy 5 | from datetime import datetime 6 | from itertools import chain 7 | 8 | import torch 9 | 10 | from core.config import cfg 11 | 12 | 13 | def get_run_name(): 14 | """ A unique name for each run """ 15 | return datetime.now().strftime( 16 | '%b%d-%H-%M-%S') + '_' + socket.gethostname() 17 | 18 | 19 | def get_output_dir(args, run_name): 20 | """ Get root output directory for each run """ 21 | cfg_filename, _ = os.path.splitext(os.path.split(args.cfg_file)[1]) 22 | return os.path.join(cfg.OUTPUT_DIR, cfg_filename, run_name) 23 | 24 | 25 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] 26 | 27 | 28 | def is_image_file(filename): 29 | """Checks if a file is an image. 30 | Args: 31 | filename (string): path to a file 32 | Returns: 33 | bool: True if the filename ends with a known image extension 34 | """ 35 | filename_lower = filename.lower() 36 | return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) 37 | 38 | 39 | def get_imagelist_from_dir(dirpath): 40 | images = [] 41 | for f in os.listdir(dirpath): 42 | if is_image_file(f): 43 | images.append(os.path.join(dirpath, f)) 44 | return images 45 | 46 | 47 | def ensure_optimizer_ckpt_params_order(param_groups_names, checkpoint): 48 | """Reorder the parameter ids in the SGD optimizer checkpoint to match 49 | the current order in the program, in case parameter insertion order is changed. 50 | """ 51 | assert len(param_groups_names) == len(checkpoint['optimizer']['param_groups']) 52 | param_lens = (len(g) for g in param_groups_names) 53 | saved_lens = (len(g['params']) for g in checkpoint['optimizer']['param_groups']) 54 | if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)): 55 | raise ValueError("loaded state dict contains a parameter group " 56 | "that doesn't match the size of optimizer's group") 57 | 58 | name_to_curpos = {} 59 | for i, p_names in enumerate(param_groups_names): 60 | for j, name in enumerate(p_names): 61 | name_to_curpos[name] = (i, j) 62 | 63 | param_groups_inds = [[] for _ in range(len(param_groups_names))] 64 | cnts = [0] * len(param_groups_names) 65 | for key in checkpoint['model']: 66 | pos = name_to_curpos.get(key) 67 | if pos: 68 | # print(key, pos, cnts[pos[0]]) 69 | saved_p_id = checkpoint['optimizer']['param_groups'][pos[0]]['params'][cnts[pos[0]]] 70 | assert (checkpoint['model'][key].shape == 71 | checkpoint['optimizer']['state'][saved_p_id]['momentum_buffer'].shape), \ 72 | ('param and momentum_buffer shape mismatch in checkpoint.' 73 | ' param_name: {}, param_id: {}'.format(key, saved_p_id)) 74 | param_groups_inds[pos[0]].append(pos[1]) 75 | cnts[pos[0]] += 1 76 | 77 | for cnt, param_inds in enumerate(param_groups_inds): 78 | ckpt_params = checkpoint['optimizer']['param_groups'][cnt]['params'] 79 | assert len(ckpt_params) == len(param_inds) 80 | ckpt_params = [x for x, _ in sorted(zip(ckpt_params, param_inds), key=lambda x: x[1])] 81 | checkpoint['optimizer']['param_groups'][cnt]['params'] = ckpt_params 82 | 83 | 84 | def load_optimizer_state_dict(optimizer, state_dict): 85 | # deepcopy, to be consistent with module API 86 | state_dict = deepcopy(state_dict) 87 | # Validate the state_dict 88 | groups = optimizer.param_groups 89 | saved_groups = state_dict['param_groups'] 90 | 91 | if len(groups) != len(saved_groups): 92 | raise ValueError("loaded state dict has a different number of " 93 | "parameter groups") 94 | param_lens = (len(g['params']) for g in groups) 95 | saved_lens = (len(g['params']) for g in saved_groups) 96 | if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)): 97 | raise ValueError("loaded state dict contains a parameter group " 98 | "that doesn't match the size of optimizer's group") 99 | 100 | # Update the state 101 | id_map = {old_id: p for old_id, p in 102 | zip(chain(*(g['params'] for g in saved_groups)), 103 | chain(*(g['params'] for g in groups)))} 104 | 105 | def cast(param, value): 106 | """Make a deep copy of value, casting all tensors to device of param.""" 107 | if torch.is_tensor(value): 108 | # Floating-point types are a bit special here. They are the only ones 109 | # that are assumed to always match the type of params. 110 | if isinstance(param.data, (torch.FloatTensor, torch.cuda.FloatTensor, 111 | torch.DoubleTensor, torch.cuda.DoubleTensor, 112 | torch.HalfTensor, torch.cuda.HalfTensor)): # param.is_floating_point(): 113 | value = value.type_as(param.data) 114 | value = value.cuda(param.get_device()) if param.is_cuda else value.cpu() 115 | return value 116 | elif isinstance(value, dict): 117 | return {k: cast(param, v) for k, v in value.items()} 118 | elif isinstance(value, Iterable): 119 | return type(value)(cast(param, v) for v in value) 120 | else: 121 | return value 122 | 123 | # Copy state assigned to params (and cast tensors to appropriate types). 124 | # State that is not assigned to params is copied as is (needed for 125 | # backward compatibility). 126 | state = defaultdict(dict) 127 | for k, v in state_dict['state'].items(): 128 | if k in id_map: 129 | param = id_map[k] 130 | state[param] = cast(param, v) 131 | else: 132 | state[k] = v 133 | 134 | # Update parameter groups, setting their 'params' value 135 | def update_group(group, new_group): 136 | new_group['params'] = group['params'] 137 | return new_group 138 | param_groups = [ 139 | update_group(g, ng) for g, ng in zip(groups, saved_groups)] 140 | optimizer.__setstate__({'state': state, 'param_groups': param_groups}) 141 | -------------------------------------------------------------------------------- /lib/utils/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | from core.config import cfg 10 | import nn as mynn 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, beta=1.0): 16 | """ 17 | SmoothL1(x) = 0.5 * x^2 / beta if |x| < beta 18 | |x| - 0.5 * beta otherwise. 19 | 1 / N * sum_i alpha_out[i] * SmoothL1(alpha_in[i] * (y_hat[i] - y[i])). 20 | N is the number of batch elements in the input predictions 21 | """ 22 | box_diff = bbox_pred - bbox_targets 23 | in_box_diff = bbox_inside_weights * box_diff 24 | abs_in_box_diff = torch.abs(in_box_diff) 25 | smoothL1_sign = (abs_in_box_diff < beta).detach().float() 26 | in_loss_box = smoothL1_sign * 0.5 * torch.pow(in_box_diff, 2) / beta + \ 27 | (1 - smoothL1_sign) * (abs_in_box_diff - (0.5 * beta)) 28 | out_loss_box = bbox_outside_weights * in_loss_box 29 | loss_box = out_loss_box 30 | N = loss_box.size(0) # batch size 31 | loss_box = loss_box.view(-1).sum(0) / N 32 | return loss_box 33 | 34 | 35 | def clip_gradient(model, clip_norm): 36 | """Computes a gradient clipping coefficient based on gradient norm.""" 37 | totalnorm = 0 38 | for p in model.parameters(): 39 | if p.requires_grad: 40 | modulenorm = p.grad.data.norm() 41 | totalnorm += modulenorm ** 2 42 | totalnorm = np.sqrt(totalnorm) 43 | 44 | norm = clip_norm / max(totalnorm, clip_norm) 45 | for p in model.parameters(): 46 | if p.requires_grad: 47 | p.grad.mul_(norm) 48 | 49 | 50 | def decay_learning_rate(optimizer, cur_lr, decay_rate): 51 | """Decay learning rate""" 52 | new_lr = cur_lr * decay_rate 53 | # ratio = _get_lr_change_ratio(cur_lr, new_lr) 54 | ratio = 1 / decay_rate 55 | if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD: 56 | logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr) 57 | # Update learning rate, note that different parameter may have different learning rate 58 | for param_group in optimizer.param_groups: 59 | cur_lr = param_group['lr'] 60 | new_lr = decay_rate * param_group['lr'] 61 | param_group['lr'] = new_lr 62 | if cfg.SOLVER.TYPE in ['SGD']: 63 | if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \ 64 | ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD: 65 | _CorrectMomentum(optimizer, param_group['params'], new_lr / cur_lr) 66 | 67 | def update_learning_rate(optimizer, cur_lr, new_lr): 68 | """Update learning rate""" 69 | if cur_lr != new_lr: 70 | ratio = _get_lr_change_ratio(cur_lr, new_lr) 71 | if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD: 72 | logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr) 73 | # Update learning rate, note that different parameter may have different learning rate 74 | param_keys = [] 75 | for ind, param_group in enumerate(optimizer.param_groups): 76 | if ind == 1 and cfg.SOLVER.BIAS_DOUBLE_LR: # bias params 77 | param_group['lr'] = new_lr * 2 78 | else: 79 | param_group['lr'] = new_lr 80 | param_keys += param_group['params'] 81 | if cfg.SOLVER.TYPE in ['SGD'] and cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \ 82 | ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD: 83 | _CorrectMomentum(optimizer, param_keys, new_lr / cur_lr) 84 | 85 | 86 | def _CorrectMomentum(optimizer, param_keys, correction): 87 | """The MomentumSGDUpdate op implements the update V as 88 | 89 | V := mu * V + lr * grad, 90 | 91 | where mu is the momentum factor, lr is the learning rate, and grad is 92 | the stochastic gradient. Since V is not defined independently of the 93 | learning rate (as it should ideally be), when the learning rate is 94 | changed we should scale the update history V in order to make it 95 | compatible in scale with lr * grad. 96 | """ 97 | logger.info('Scaling update history by %.6f (new lr / old lr)', correction) 98 | for p_key in param_keys: 99 | optimizer.state[p_key]['momentum_buffer'] *= correction 100 | 101 | 102 | def _get_lr_change_ratio(cur_lr, new_lr): 103 | eps = 1e-10 104 | ratio = np.max( 105 | (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps))) 106 | ) 107 | return ratio 108 | 109 | 110 | def affine_grid_gen(rois, input_size, grid_size): 111 | 112 | rois = rois.detach() 113 | x1 = rois[:, 1::4] / 16.0 114 | y1 = rois[:, 2::4] / 16.0 115 | x2 = rois[:, 3::4] / 16.0 116 | y2 = rois[:, 4::4] / 16.0 117 | 118 | height = input_size[0] 119 | width = input_size[1] 120 | 121 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 122 | theta = torch.cat([\ 123 | (x2 - x1) / (width - 1), 124 | zero, 125 | (x1 + x2 - width + 1) / (width - 1), 126 | zero, 127 | (y2 - y1) / (height - 1), 128 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 129 | 130 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size))) 131 | 132 | return grid 133 | 134 | 135 | def save_ckpt(output_dir, args, model, optimizer): 136 | """Save checkpoint""" 137 | if args.no_save: 138 | return 139 | ckpt_dir = os.path.join(output_dir, 'ckpt') 140 | if not os.path.exists(ckpt_dir): 141 | os.makedirs(ckpt_dir) 142 | save_name = os.path.join(ckpt_dir, 'model_{}_{}.pth'.format(args.epoch, args.step)) 143 | if isinstance(model, mynn.DataParallel): 144 | model = model.module 145 | # TODO: (maybe) Do not save redundant shared params 146 | # model_state_dict = model.state_dict() 147 | torch.save({ 148 | 'epoch': args.epoch, 149 | 'step': args.step, 150 | 'iters_per_epoch': args.iters_per_epoch, 151 | 'model': model.state_dict(), 152 | 'optimizer': optimizer.state_dict()}, save_name) 153 | logger.info('save model: %s', save_name) 154 | 155 | 156 | def load_ckpt(model, ckpt): 157 | """Load checkpoint""" 158 | mapping, _ = model.detectron_weight_mapping 159 | state_dict = {} 160 | for name in ckpt: 161 | if mapping[name]: 162 | state_dict[name] = ckpt[name] 163 | model.load_state_dict(state_dict, strict=False) 164 | 165 | 166 | def get_group_gn(dim): 167 | """ 168 | get number of groups used by GroupNorm, based on number of channels 169 | """ 170 | dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP 171 | num_groups = cfg.GROUP_NORM.NUM_GROUPS 172 | 173 | assert dim_per_gp == -1 or num_groups == -1, \ 174 | "GroupNorm: can only specify G or C/G." 175 | 176 | if dim_per_gp > 0: 177 | assert dim % dim_per_gp == 0 178 | group_gn = dim // dim_per_gp 179 | else: 180 | assert dim % num_groups == 0 181 | group_gn = num_groups 182 | return group_gn 183 | -------------------------------------------------------------------------------- /lib/utils/net_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import torchvision.models as models 7 | from core.config import cfg 8 | from model.roi_crop.functions.roi_crop import RoICropFunction 9 | import cv2 10 | import pdb 11 | import random 12 | 13 | def save_net(fname, net): 14 | import h5py 15 | h5f = h5py.File(fname, mode='w') 16 | for k, v in net.state_dict().items(): 17 | h5f.create_dataset(k, data=v.cpu().numpy()) 18 | 19 | def load_net(fname, net): 20 | import h5py 21 | h5f = h5py.File(fname, mode='r') 22 | for k, v in net.state_dict().items(): 23 | param = torch.from_numpy(np.asarray(h5f[k])) 24 | v.copy_(param) 25 | 26 | def weights_normal_init(model, dev=0.01): 27 | if isinstance(model, list): 28 | for m in model: 29 | weights_normal_init(m, dev) 30 | else: 31 | for m in model.modules(): 32 | if isinstance(m, nn.Conv2d): 33 | m.weight.data.normal_(0.0, dev) 34 | elif isinstance(m, nn.Linear): 35 | m.weight.data.normal_(0.0, dev) 36 | 37 | 38 | def _crop_pool_layer(bottom, rois, max_pool=True): 39 | # code modified from 40 | # https://github.com/ruotianluo/pytorch-faster-rcnn 41 | # implement it using stn 42 | # box to affine 43 | # input (x1,y1,x2,y2) 44 | """ 45 | [ x2-x1 x1 + x2 - W + 1 ] 46 | [ ----- 0 --------------- ] 47 | [ W - 1 W - 1 ] 48 | [ ] 49 | [ y2-y1 y1 + y2 - H + 1 ] 50 | [ 0 ----- --------------- ] 51 | [ H - 1 H - 1 ] 52 | """ 53 | rois = rois.detach() 54 | batch_size = bottom.size(0) 55 | D = bottom.size(1) 56 | H = bottom.size(2) 57 | W = bottom.size(3) 58 | roi_per_batch = rois.size(0) / batch_size 59 | x1 = rois[:, 1::4] / 16.0 60 | y1 = rois[:, 2::4] / 16.0 61 | x2 = rois[:, 3::4] / 16.0 62 | y2 = rois[:, 4::4] / 16.0 63 | 64 | height = bottom.size(2) 65 | width = bottom.size(3) 66 | 67 | # affine theta 68 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 69 | theta = torch.cat([\ 70 | (x2 - x1) / (width - 1), 71 | zero, 72 | (x1 + x2 - width + 1) / (width - 1), 73 | zero, 74 | (y2 - y1) / (height - 1), 75 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 76 | 77 | if max_pool: 78 | pre_pool_size = cfg.POOLING_SIZE * 2 79 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) 80 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 81 | .contiguous().view(-1, D, H, W) 82 | crops = F.grid_sample(bottom, grid) 83 | crops = F.max_pool2d(crops, 2, 2) 84 | else: 85 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) 86 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 87 | .contiguous().view(-1, D, H, W) 88 | crops = F.grid_sample(bottom, grid) 89 | 90 | return crops, grid 91 | 92 | def _affine_grid_gen(rois, input_size, grid_size): 93 | 94 | rois = rois.detach() 95 | x1 = rois[:, 1::4] / 16.0 96 | y1 = rois[:, 2::4] / 16.0 97 | x2 = rois[:, 3::4] / 16.0 98 | y2 = rois[:, 4::4] / 16.0 99 | 100 | height = input_size[0] 101 | width = input_size[1] 102 | 103 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 104 | theta = torch.cat([\ 105 | (x2 - x1) / (width - 1), 106 | zero, 107 | (x1 + x2 - width + 1) / (width - 1), 108 | zero, 109 | (y2 - y1) / (height - 1), 110 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 111 | 112 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size))) 113 | 114 | return grid 115 | 116 | def _affine_theta(rois, input_size): 117 | 118 | rois = rois.detach() 119 | x1 = rois[:, 1::4] / 16.0 120 | y1 = rois[:, 2::4] / 16.0 121 | x2 = rois[:, 3::4] / 16.0 122 | y2 = rois[:, 4::4] / 16.0 123 | 124 | height = input_size[0] 125 | width = input_size[1] 126 | 127 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 128 | 129 | # theta = torch.cat([\ 130 | # (x2 - x1) / (width - 1), 131 | # zero, 132 | # (x1 + x2 - width + 1) / (width - 1), 133 | # zero, 134 | # (y2 - y1) / (height - 1), 135 | # (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 136 | 137 | theta = torch.cat([\ 138 | (y2 - y1) / (height - 1), 139 | zero, 140 | (y1 + y2 - height + 1) / (height - 1), 141 | zero, 142 | (x2 - x1) / (width - 1), 143 | (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3) 144 | 145 | return theta 146 | 147 | def compare_grid_sample(): 148 | # do gradcheck 149 | N = random.randint(1, 8) 150 | C = 2 # random.randint(1, 8) 151 | H = 5 # random.randint(1, 8) 152 | W = 4 # random.randint(1, 8) 153 | input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True) 154 | input_p = input.clone().data.contiguous() 155 | 156 | grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True) 157 | grid_clone = grid.clone().contiguous() 158 | 159 | out_offcial = F.grid_sample(input, grid) 160 | grad_outputs = Variable(torch.rand(out_offcial.size()).cuda()) 161 | grad_outputs_clone = grad_outputs.clone().contiguous() 162 | grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous()) 163 | grad_input_off = grad_inputs[0] 164 | 165 | 166 | crf = RoICropFunction() 167 | grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda() 168 | out_stn = crf.forward(input_p, grid_yx) 169 | grad_inputs = crf.backward(grad_outputs_clone.data) 170 | grad_input_stn = grad_inputs[0] 171 | pdb.set_trace() 172 | 173 | delta = (grad_input_off.data - grad_input_stn).sum() 174 | -------------------------------------------------------------------------------- /lib/utils/resnet_weights_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for converting resnet pretrained weights from other formats 3 | """ 4 | import os 5 | import pickle 6 | 7 | import torch 8 | 9 | import nn as mynn 10 | import utils.detectron_weight_helper as dwh 11 | from core.config import cfg 12 | 13 | 14 | def load_pretrained_imagenet_weights(model): 15 | """Load pretrained weights 16 | Args: 17 | num_layers: 50 for res50 and so on. 18 | model: the generalized rcnnn module 19 | """ 20 | _, ext = os.path.splitext(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS) 21 | if ext == '.pkl': 22 | with open(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS, 'rb') as fp: 23 | src_blobs = pickle.load(fp, encoding='latin1') 24 | if 'blobs' in src_blobs: 25 | src_blobs = src_blobs['blobs'] 26 | pretrianed_state_dict = src_blobs 27 | else: 28 | weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS) 29 | pretrianed_state_dict = convert_state_dict(torch.load(weights_file)) 30 | 31 | # Convert batchnorm weights 32 | for name, mod in model.named_modules(): 33 | if isinstance(mod, mynn.AffineChannel2d): 34 | if cfg.FPN.FPN_ON: 35 | pretrianed_name = name.split('.', 2)[-1] 36 | else: 37 | pretrianed_name = name.split('.', 1)[-1] 38 | bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean'] 39 | bn_var = pretrianed_state_dict[pretrianed_name + '.running_var'] 40 | scale = pretrianed_state_dict[pretrianed_name + '.weight'] 41 | bias = pretrianed_state_dict[pretrianed_name + '.bias'] 42 | std = torch.sqrt(bn_var + 1e-5) 43 | new_scale = scale / std 44 | new_bias = bias - bn_mean * scale / std 45 | pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale 46 | pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias 47 | 48 | model_state_dict = model.state_dict() 49 | 50 | pattern = dwh.resnet_weights_name_pattern() 51 | 52 | name_mapping, _ = model.detectron_weight_mapping 53 | 54 | for k, v in name_mapping.items(): 55 | if isinstance(v, str): # maybe a str, None or True 56 | if pattern.match(v): 57 | if cfg.FPN.FPN_ON: 58 | pretrianed_key = k.split('.', 2)[-1] 59 | else: 60 | pretrianed_key = k.split('.', 1)[-1] 61 | if ext == '.pkl': 62 | model_state_dict[k].copy_(torch.Tensor(pretrianed_state_dict[v])) 63 | else: 64 | model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key]) 65 | 66 | 67 | def convert_state_dict(src_dict): 68 | """Return the correct mapping of tensor name and value 69 | 70 | Mapping from the names of torchvision model to our resnet conv_body and box_head. 71 | """ 72 | dst_dict = {} 73 | for k, v in src_dict.items(): 74 | toks = k.split('.') 75 | if k.startswith('layer'): 76 | assert len(toks[0]) == 6 77 | res_id = int(toks[0][5]) + 1 78 | name = '.'.join(['res%d' % res_id] + toks[1:]) 79 | dst_dict[name] = v 80 | elif k.startswith('fc'): 81 | continue 82 | else: 83 | name = '.'.join(['res1'] + toks) 84 | dst_dict[name] = v 85 | return dst_dict 86 | -------------------------------------------------------------------------------- /lib/utils/subprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Primitives for running multiple single-GPU jobs in parallel over subranges of 17 | data. These are used for running multi-GPU inference. Subprocesses are used to 18 | avoid the GIL since inference may involve non-trivial amounts of Python code. 19 | """ 20 | 21 | # from __future__ import absolute_import 22 | # from __future__ import division 23 | # from __future__ import print_function 24 | # from __future__ import unicode_literals 25 | 26 | from io import IOBase 27 | import logging 28 | import os 29 | import subprocess 30 | from six.moves import shlex_quote 31 | from six.moves import cPickle as pickle 32 | import yaml 33 | import numpy as np 34 | import torch 35 | 36 | from core.config import cfg 37 | 38 | logger = logging.getLogger(__name__) 39 | 40 | 41 | def process_in_parallel( 42 | tag, total_range_size, binary, output_dir, 43 | load_ckpt, load_detectron, opts=''): 44 | """Run the specified binary NUM_GPUS times in parallel, each time as a 45 | subprocess that uses one GPU. The binary must accept the command line 46 | arguments `--range {start} {end}` that specify a data processing range. 47 | """ 48 | # Snapshot the current cfg state in order to pass to the inference 49 | # subprocesses 50 | cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag)) 51 | with open(cfg_file, 'w') as f: 52 | yaml.dump(cfg, stream=f) 53 | subprocess_env = os.environ.copy() 54 | processes = [] 55 | NUM_GPUS = torch.cuda.device_count() 56 | subinds = np.array_split(range(total_range_size), NUM_GPUS) 57 | # Determine GPUs to use 58 | cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') 59 | if cuda_visible_devices: 60 | gpu_inds = list(map(int, cuda_visible_devices.split(','))) 61 | assert -1 not in gpu_inds, \ 62 | 'Hiding GPU indices using the \'-1\' index is not supported' 63 | else: 64 | gpu_inds = range(cfg.NUM_GPUS) 65 | gpu_inds = list(gpu_inds) 66 | # Run the binary in cfg.NUM_GPUS subprocesses 67 | for i, gpu_ind in enumerate(gpu_inds): 68 | start = subinds[i][0] 69 | end = subinds[i][-1] + 1 70 | subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind) 71 | cmd = ('python {binary} --range {start} {end} --cfg {cfg_file} --set {opts} ' 72 | '--output_dir {output_dir}') 73 | if load_ckpt is not None: 74 | cmd += ' --load_ckpt {load_ckpt}' 75 | elif load_detectron is not None: 76 | cmd += ' --load_detectron {load_detectron}' 77 | cmd = cmd.format( 78 | binary=shlex_quote(binary), 79 | start=int(start), 80 | end=int(end), 81 | cfg_file=shlex_quote(cfg_file), 82 | output_dir=output_dir, 83 | load_ckpt=load_ckpt, 84 | load_detectron=load_detectron, 85 | opts=' '.join([shlex_quote(opt) for opt in opts]) 86 | ) 87 | logger.info('{} range command {}: {}'.format(tag, i, cmd)) 88 | if i == 0: 89 | subprocess_stdout = subprocess.PIPE 90 | else: 91 | filename = os.path.join( 92 | output_dir, '%s_range_%s_%s.stdout' % (tag, start, end) 93 | ) 94 | subprocess_stdout = open(filename, 'w') 95 | p = subprocess.Popen( 96 | cmd, 97 | shell=True, 98 | env=subprocess_env, 99 | stdout=subprocess_stdout, 100 | stderr=subprocess.STDOUT, 101 | bufsize=1 102 | ) 103 | processes.append((i, p, start, end, subprocess_stdout)) 104 | # Log output from inference processes and collate their results 105 | outputs = [] 106 | for i, p, start, end, subprocess_stdout in processes: 107 | log_subprocess_output(i, p, output_dir, tag, start, end) 108 | if isinstance(subprocess_stdout, IOBase): 109 | subprocess_stdout.close() 110 | range_file = os.path.join( 111 | output_dir, '%s_range_%s_%s.pkl' % (tag, start, end) 112 | ) 113 | range_data = pickle.load(open(range_file, 'rb')) 114 | outputs.append(range_data) 115 | return outputs 116 | 117 | 118 | def log_subprocess_output(i, p, output_dir, tag, start, end): 119 | """Capture the output of each subprocess and log it in the parent process. 120 | The first subprocess's output is logged in realtime. The output from the 121 | other subprocesses is buffered and then printed all at once (in order) when 122 | subprocesses finish. 123 | """ 124 | outfile = os.path.join( 125 | output_dir, '%s_range_%s_%s.stdout' % (tag, start, end) 126 | ) 127 | logger.info('# ' + '-' * 76 + ' #') 128 | logger.info( 129 | 'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end) 130 | ) 131 | logger.info('# ' + '-' * 76 + ' #') 132 | if i == 0: 133 | # Stream the piped stdout from the first subprocess in realtime 134 | with open(outfile, 'w') as f: 135 | for line in iter(p.stdout.readline, b''): 136 | print(line.rstrip().decode('ascii')) 137 | f.write(str(line, encoding='ascii')) 138 | p.stdout.close() 139 | ret = p.wait() 140 | else: 141 | # For subprocesses >= 1, wait and dump their log file 142 | ret = p.wait() 143 | with open(outfile, 'r') as f: 144 | print(''.join(f.readlines())) 145 | assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret) 146 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import time 7 | 8 | 9 | class Timer(object): 10 | """A simple timer.""" 11 | 12 | def __init__(self): 13 | self.reset() 14 | 15 | def tic(self): 16 | # using time.time instead of time.clock because time time.clock 17 | # does not normalize for multithreading 18 | self.start_time = time.time() 19 | 20 | def toc(self, average=True): 21 | self.diff = time.time() - self.start_time 22 | self.total_time += self.diff 23 | self.calls += 1 24 | self.average_time = self.total_time / self.calls 25 | if average: 26 | return self.average_time 27 | else: 28 | return self.diff 29 | 30 | def reset(self): 31 | self.total_time = 0. 32 | self.calls = 0 33 | self.start_time = 0. 34 | self.diff = 0. 35 | self.average_time = 0. 36 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH 2 | 3 | Usage: 4 | import this module before import any modules under lib/ 5 | e.g 6 | import _init_paths 7 | from core.config import cfg 8 | """ 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | 14 | def add_path(path): 15 | if path not in sys.path: 16 | sys.path.insert(0, path) 17 | 18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__))) 19 | 20 | # Add lib to PYTHONPATH 21 | lib_path = osp.join(this_dir, 'lib') 22 | add_path(lib_path) 23 | -------------------------------------------------------------------------------- /tools/download_imagenet_weights.py: -------------------------------------------------------------------------------- 1 | """Script to downlaod ImageNet pretrained weights from Google Drive 2 | 3 | Extra packages required to run the script: 4 | colorama, argparse_color_formatter 5 | """ 6 | 7 | import argparse 8 | import os 9 | import requests 10 | from argparse_color_formatter import ColorHelpFormatter 11 | from colorama import init, Fore 12 | 13 | import _init_paths # pylint: disable=unused-import 14 | from core.config import cfg 15 | 16 | 17 | def parse_args(): 18 | """Parser command line argumnets""" 19 | parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter) 20 | parser.add_argument('--output_dir', help='Directory to save downloaded weight files', 21 | default=os.path.join(cfg.DATA_DIR, 'pretrained_model')) 22 | parser.add_argument('-t', '--targets', nargs='+', metavar='file_name', 23 | help='Files to download. Allowed values are: ' + 24 | ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET, 25 | list(PRETRAINED_WEIGHTS.keys()))), 26 | choices=list(PRETRAINED_WEIGHTS.keys()), 27 | default=list(PRETRAINED_WEIGHTS.keys())) 28 | return parser.parse_args() 29 | 30 | 31 | # ---------------------------------------------------------------------------- # 32 | # Mapping from filename to google drive file_id 33 | # ---------------------------------------------------------------------------- # 34 | PRETRAINED_WEIGHTS = { 35 | 'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1', 36 | 'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l', 37 | 'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF', 38 | 'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k', 39 | } 40 | 41 | 42 | # ---------------------------------------------------------------------------- # 43 | # Helper fucntions for download file from google drive 44 | # ---------------------------------------------------------------------------- # 45 | 46 | def download_file_from_google_drive(id, destination): 47 | URL = "https://docs.google.com/uc?export=download" 48 | 49 | session = requests.Session() 50 | 51 | response = session.get(URL, params={'id': id}, stream=True) 52 | token = get_confirm_token(response) 53 | 54 | if token: 55 | params = {'id': id, 'confirm': token} 56 | response = session.get(URL, params=params, stream=True) 57 | 58 | save_response_content(response, destination) 59 | 60 | 61 | def get_confirm_token(response): 62 | for key, value in response.cookies.items(): 63 | if key.startswith('download_warning'): 64 | return value 65 | 66 | return None 67 | 68 | 69 | def save_response_content(response, destination): 70 | CHUNK_SIZE = 32768 71 | 72 | with open(destination, "wb") as f: 73 | for chunk in response.iter_content(CHUNK_SIZE): 74 | if chunk: # filter out keep-alive new chunks 75 | f.write(chunk) 76 | 77 | 78 | def main(): 79 | init() # colorama init. Only has effect on Windows 80 | args = parse_args() 81 | for filename in args.targets: 82 | file_id = PRETRAINED_WEIGHTS[filename] 83 | if not os.path.exists(args.output_dir): 84 | os.makedirs(args.output_dir) 85 | destination = os.path.join(args.output_dir, filename) 86 | download_file_from_google_drive(file_id, destination) 87 | print('Download {} to {}'.format(filename, destination)) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /tools/infer_simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import distutils.util 7 | import os 8 | import sys 9 | import pprint 10 | import subprocess 11 | from collections import defaultdict 12 | from six.moves import xrange 13 | 14 | # Use a non-interactive backend 15 | import matplotlib 16 | matplotlib.use('Agg') 17 | 18 | import numpy as np 19 | import cv2 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.autograd import Variable 24 | 25 | import _init_paths 26 | import nn as mynn 27 | from core.config import cfg, cfg_from_file, cfg_from_list, assert_and_infer_cfg 28 | from core.test import im_detect_all 29 | from modeling.model_builder import Generalized_RCNN 30 | import datasets.dummy_datasets as datasets 31 | import utils.misc as misc_utils 32 | import utils.net as net_utils 33 | import utils.vis as vis_utils 34 | from utils.detectron_weight_helper import load_detectron_weight 35 | from utils.timer import Timer 36 | 37 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not 38 | # thread safe and causes unwanted GPU memory allocations. 39 | cv2.ocl.setUseOpenCL(False) 40 | 41 | 42 | def parse_args(): 43 | """Parse in command line arguments""" 44 | parser = argparse.ArgumentParser(description='Demonstrate mask-rcnn results') 45 | parser.add_argument( 46 | '--dataset', required=True, 47 | help='training dataset') 48 | 49 | parser.add_argument( 50 | '--cfg', dest='cfg_file', required=True, 51 | help='optional config file') 52 | parser.add_argument( 53 | '--set', dest='set_cfgs', 54 | help='set config keys, will overwrite config in the cfg_file', 55 | default=[], nargs='+') 56 | 57 | parser.add_argument( 58 | '--no_cuda', dest='cuda', help='whether use CUDA', action='store_false') 59 | 60 | parser.add_argument('--load_ckpt', help='path of checkpoint to load') 61 | parser.add_argument( 62 | '--load_detectron', help='path to the detectron weight pickle file') 63 | 64 | parser.add_argument( 65 | '--image_dir', 66 | help='directory to load images for demo') 67 | parser.add_argument( 68 | '--images', nargs='+', 69 | help='images to infer. Must not use with --image_dir') 70 | parser.add_argument( 71 | '--output_dir', 72 | help='directory to save demo results', 73 | default="infer_outputs") 74 | parser.add_argument( 75 | '--merge_pdfs', type=distutils.util.strtobool, default=True) 76 | 77 | args = parser.parse_args() 78 | 79 | return args 80 | 81 | 82 | def main(): 83 | """main function""" 84 | 85 | if not torch.cuda.is_available(): 86 | sys.exit("Need a CUDA device to run the code.") 87 | 88 | args = parse_args() 89 | print('Called with args:') 90 | print(args) 91 | 92 | assert args.image_dir or args.images 93 | assert bool(args.image_dir) ^ bool(args.images) 94 | 95 | if args.dataset.startswith("coco"): 96 | dataset = datasets.get_coco_dataset() 97 | cfg.MODEL.NUM_CLASSES = len(dataset.classes) 98 | elif args.dataset.startswith("keypoints_coco"): 99 | dataset = datasets.get_coco_dataset() 100 | cfg.MODEL.NUM_CLASSES = 2 101 | else: 102 | raise ValueError('Unexpected dataset name: {}'.format(args.dataset)) 103 | 104 | print('load cfg from file: {}'.format(args.cfg_file)) 105 | cfg_from_file(args.cfg_file) 106 | 107 | if args.set_cfgs is not None: 108 | cfg_from_list(args.set_cfgs) 109 | 110 | assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 111 | 'Exactly one of --load_ckpt and --load_detectron should be specified.' 112 | cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False # Don't need to load imagenet pretrained weights 113 | assert_and_infer_cfg() 114 | 115 | maskRCNN = Generalized_RCNN() 116 | 117 | if args.cuda: 118 | maskRCNN.cuda() 119 | 120 | if args.load_ckpt: 121 | load_name = args.load_ckpt 122 | print("loading checkpoint %s" % (load_name)) 123 | checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) 124 | net_utils.load_ckpt(maskRCNN, checkpoint['model']) 125 | 126 | if args.load_detectron: 127 | print("loading detectron weights %s" % args.load_detectron) 128 | load_detectron_weight(maskRCNN, args.load_detectron) 129 | 130 | maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], 131 | minibatch=True, device_ids=[0]) # only support single GPU 132 | 133 | maskRCNN.eval() 134 | if args.image_dir: 135 | imglist = misc_utils.get_imagelist_from_dir(args.image_dir) 136 | else: 137 | imglist = args.images 138 | num_images = len(imglist) 139 | if not os.path.exists(args.output_dir): 140 | os.makedirs(args.output_dir) 141 | 142 | for i in xrange(num_images): 143 | print('img', i) 144 | im = cv2.imread(imglist[i]) 145 | assert im is not None 146 | 147 | timers = defaultdict(Timer) 148 | 149 | cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) 150 | 151 | im_name, _ = os.path.splitext(os.path.basename(imglist[i])) 152 | vis_utils.vis_one_image( 153 | im[:, :, ::-1], # BGR -> RGB for visualization 154 | im_name, 155 | args.output_dir, 156 | cls_boxes, 157 | cls_segms, 158 | cls_keyps, 159 | dataset=dataset, 160 | box_alpha=0.3, 161 | show_class=True, 162 | thresh=0.7, 163 | kp_thresh=2 164 | ) 165 | 166 | if args.merge_pdfs and num_images > 1: 167 | merge_out_path = '{}/results.pdf'.format(args.output_dir) 168 | if os.path.exists(merge_out_path): 169 | os.remove(merge_out_path) 170 | command = "pdfunite {}/*.pdf {}".format(args.output_dir, 171 | merge_out_path) 172 | subprocess.call(command, shell=True) 173 | 174 | 175 | if __name__ == '__main__': 176 | main() 177 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | """Perform inference on one or more datasets.""" 2 | 3 | import argparse 4 | import cv2 5 | import os 6 | import pprint 7 | import sys 8 | import time 9 | 10 | import torch 11 | 12 | import _init_paths # pylint: disable=unused-import 13 | from core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg 14 | from core.test_engine import run_inference 15 | import utils.logging 16 | 17 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not 18 | # thread safe and causes unwanted GPU memory allocations. 19 | cv2.ocl.setUseOpenCL(False) 20 | 21 | 22 | def parse_args(): 23 | """Parse in command line arguments""" 24 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 25 | parser.add_argument( 26 | '--dataset', 27 | help='training dataset') 28 | parser.add_argument( 29 | '--num_classes', dest='num_classes', 30 | help='Number of classes in your custom dataset', 31 | default=None, type=int) 32 | parser.add_argument( 33 | '--cfg', dest='cfg_file', required=True, 34 | help='optional config file') 35 | 36 | parser.add_argument( 37 | '--load_ckpt', help='path of checkpoint to load') 38 | parser.add_argument( 39 | '--load_detectron', help='path to the detectron weight pickle file') 40 | 41 | parser.add_argument( 42 | '--output_dir', 43 | help='output directory to save the testing results. If not provided, ' 44 | 'defaults to [args.load_ckpt|args.load_detectron]/../test.') 45 | 46 | parser.add_argument( 47 | '--set', dest='set_cfgs', 48 | help='set config keys, will overwrite config in the cfg_file.' 49 | ' See lib/core/config.py for all options', 50 | default=[], nargs='*') 51 | 52 | parser.add_argument( 53 | '--range', 54 | help='start (inclusive) and end (exclusive) indices', 55 | type=int, nargs=2) 56 | parser.add_argument( 57 | '--multi-gpu-testing', help='using multiple gpus for inference', 58 | action='store_true') 59 | parser.add_argument( 60 | '--vis', dest='vis', help='visualize detections', action='store_true') 61 | 62 | return parser.parse_args() 63 | 64 | 65 | if __name__ == '__main__': 66 | 67 | if not torch.cuda.is_available(): 68 | sys.exit("Need a CUDA device to run the code.") 69 | 70 | logger = utils.logging.setup_logging(__name__) 71 | args = parse_args() 72 | logger.info('Called with args:') 73 | logger.info(args) 74 | 75 | assert (torch.cuda.device_count() == 1) ^ bool(args.multi_gpu_testing) 76 | 77 | assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 78 | 'Exactly one of --load_ckpt and --load_detectron should be specified.' 79 | if args.output_dir is None: 80 | ckpt_path = args.load_ckpt if args.load_ckpt else args.load_detectron 81 | args.output_dir = os.path.join( 82 | os.path.dirname(os.path.dirname(ckpt_path)), 'test') 83 | logger.info('Automatically set output directory to %s', args.output_dir) 84 | if not os.path.exists(args.output_dir): 85 | os.makedirs(args.output_dir) 86 | 87 | cfg.VIS = args.vis 88 | 89 | if args.dataset == "custom_dataset" and args.num_classes is None: 90 | raise ValueError("Need number of classes in your custom dataset to run!") 91 | 92 | if args.cfg_file is not None: 93 | merge_cfg_from_file(args.cfg_file) 94 | if args.set_cfgs is not None: 95 | merge_cfg_from_list(args.set_cfgs) 96 | 97 | if args.dataset == "coco2017": 98 | cfg.TEST.DATASETS = ('coco_2017_val',) 99 | cfg.MODEL.NUM_CLASSES = 81 100 | elif args.dataset == "keypoints_coco2017": 101 | cfg.TEST.DATASETS = ('keypoints_coco_2017_val',) 102 | cfg.MODEL.NUM_CLASSES = 2 103 | elif args.dataset == "voc2007": 104 | cfg.TEST.DATASETS = ('voc_2007_test',) 105 | cfg.MODEL.NUM_CLASSES = 21 106 | elif args.dataset == "custom_dataset": 107 | cfg.TEST.DATASETS = ('custom_data_test',) 108 | cfg.MODEL.NUM_CLASSES = args.num_classes 109 | else: # For subprocess call 110 | assert cfg.TEST.DATASETS, 'cfg.TEST.DATASETS shouldn\'t be empty' 111 | assert_and_infer_cfg() 112 | 113 | logger.info('Testing with config:') 114 | logger.info(pprint.pformat(cfg)) 115 | 116 | # For test_engine.multi_gpu_test_net_on_dataset 117 | args.test_net_file, _ = os.path.splitext(__file__) 118 | # manually set args.cuda 119 | args.cuda = True 120 | 121 | run_inference( 122 | args, 123 | ind_range=args.range, 124 | multi_gpu_testing=args.multi_gpu_testing, 125 | check_expected_results=True) 126 | --------------------------------------------------------------------------------