├── .github └── issue_template.md ├── .gitignore ├── .pylintrc ├── .travis.yml ├── .vscode └── settings.json ├── BENCHMARK.md ├── LICENSE ├── README.md ├── configs ├── baselines │ ├── e2e_faster_rcnn_R-101-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-101-FPN_2x.yaml │ ├── e2e_faster_rcnn_R-50-C4_1x.yaml │ ├── e2e_faster_rcnn_R-50-C4_2x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ ├── e2e_mask_rcnn_R-101-C4_2x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_2x.yaml │ ├── e2e_mask_rcnn_R-50-C4_1x.yaml │ ├── e2e_mask_rcnn_R-50-C4_2x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml │ └── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml ├── getting_started │ ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml │ └── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml └── gn_baselines │ ├── e2e_mask_rcnn_R-101-FPN_2x_gn.yaml │ ├── e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ ├── e2e_mask_rcnn_R-50-FPN_2x_gn.yaml │ ├── e2e_mask_rcnn_R-50-FPN_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml ├── demo ├── 33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-detectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg ├── 33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg ├── 33823288584_1d21cf0a26_k.jpg ├── convert_pdf2img.sh ├── e2e_mask_rcnn_R-50-C4 │ └── train_from_scratch_epoch1_bs4 │ │ ├── img1.jpg │ │ ├── img2.jpg │ │ ├── img3.jpg │ │ └── img4.jpg ├── img1_keypoints-detectron-R50-FPN.jpg ├── img1_keypoints-pydetectron-R50-FPN.jpg ├── img2_keypoints-detectron-R50-FPN.jpg ├── img2_keypoints-pydetectron-R50-FPN.jpg ├── loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg ├── loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg ├── loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg ├── loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg ├── loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg ├── sample_images │ ├── img1.jpg │ ├── img2.jpg │ ├── img3.jpg │ └── img4.jpg └── sample_images_keypoints │ ├── img1_keypoints.jpg │ └── img2_keypoints.jpg ├── lib ├── core │ ├── __init__.py │ ├── config.py │ ├── test.py │ └── test_engine.py ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── cityscapes │ │ ├── __init__.py │ │ ├── coco_to_cityscapes_id.py │ │ └── tools │ │ │ ├── convert_cityscapes_to_coco.py │ │ │ └── convert_coco_model_to_cityscapes.py │ ├── cityscapes_json_dataset_evaluator.py │ ├── dataset_catalog.py │ ├── dummy_datasets.py │ ├── json_dataset.py │ ├── json_dataset_evaluator.py │ ├── roidb.py │ ├── task_evaluation.py │ ├── voc_dataset_evaluator.py │ └── voc_eval.py ├── make.sh ├── model │ ├── __init__.py │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── nms │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── make.sh │ │ ├── nms_gpu.py │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cuda.c │ │ │ ├── nms_cuda.h │ │ │ ├── nms_cuda_kernel.cu │ │ │ └── nms_cuda_kernel.h │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ ├── roi_crop │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── crop_resize │ │ │ │ ├── __init__.py │ │ │ │ └── _crop_resize.so │ │ │ └── roi_crop │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── crop_resize.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ └── src │ │ │ ├── roi_crop.c │ │ │ ├── roi_crop.h │ │ │ ├── roi_crop_cuda.c │ │ │ ├── roi_crop_cuda.h │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ └── roi_crop_cuda_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_pooling │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ └── src │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ ├── roi_pooling_cuda.h │ │ │ ├── roi_pooling_kernel.cu │ │ │ └── roi_pooling_kernel.h │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ └── net_utils.py ├── modeling │ ├── FPN.py │ ├── ResNet.py │ ├── __init__.py │ ├── collect_and_distribute_fpn_rpn_proposals.py │ ├── fast_rcnn_heads.py │ ├── generate_anchors.py │ ├── generate_proposal_labels.py │ ├── generate_proposals.py │ ├── keypoint_rcnn_heads.py │ ├── mask_rcnn_heads.py │ ├── model_builder.py │ ├── roi_xfrom │ │ ├── __init__.py │ │ └── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ └── src │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ └── rpn_heads.py ├── nn │ ├── __init__.py │ ├── functional.py │ ├── init.py │ ├── modules │ │ ├── __init__.py │ │ ├── affine.py │ │ ├── normalization.py │ │ └── upsample.py │ └── parallel │ │ ├── __init__.py │ │ ├── _functions.py │ │ ├── data_parallel.py │ │ ├── parallel_apply.py │ │ ├── replicate.py │ │ └── scatter_gather.py ├── roi_data │ ├── __init__.py │ ├── data_utils.py │ ├── fast_rcnn.py │ ├── keypoint_rcnn.py │ ├── loader.py │ ├── mask_rcnn.py │ ├── minibatch.py │ └── rpn.py ├── setup.py └── utils │ ├── __init__.py │ ├── blob.py │ ├── boxes.py │ ├── collections.py │ ├── colormap.py │ ├── cython_bbox.c │ ├── cython_bbox.pyx │ ├── cython_nms.c │ ├── cython_nms.pyx │ ├── detectron_weight_helper.py │ ├── env.py │ ├── fpn.py │ ├── image.py │ ├── io.py │ ├── keypoints.py │ ├── logging.py │ ├── misc.py │ ├── net.py │ ├── resnet_weights_helper.py │ ├── segms.py │ ├── subprocess.py │ ├── timer.py │ ├── training_stats.py │ └── vis.py └── tools ├── _init_paths.py ├── download_imagenet_weights.py ├── infer_simple.py ├── test_net.py ├── train_net.py └── train_net_step.py /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING 2 | 1. **Read the README.md thoroughly ! README.md is not a decoration.** 3 | 2. Please search existing *open and closed* issues in case your issue has already been reported 4 | 3. Please try to debug the issue in case you can solve it on your own before posting 5 | 6 | ## After following steps above and agreeing to provide the detailed information requested below, you may continue with posting your issue 7 | (**Delete this line and the text above it.**) 8 | 9 | ### Expected results 10 | 11 | What did you expect to see? 12 | 13 | ### Actual results 14 | 15 | What did you observe instead? 16 | 17 | ### Detailed steps to reproduce 18 | 19 | E.g.: 20 | 21 | ``` 22 | The command that you ran 23 | ``` 24 | 25 | ### System information 26 | 27 | * Operating system: ? 28 | * CUDA version: ? 29 | * cuDNN version: ? 30 | * GPU models (for all devices if they are not all the same): ? 31 | * python version: ? 32 | * pytorch version: ? 33 | * Anything else that seems relevant: ? 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | *.pyc 3 | *~ 4 | 5 | *.o 6 | *.so 7 | 8 | .ipynb_checkpoints 9 | notebooks/*.pkl 10 | 11 | /Outputs 12 | 13 | # ------------------------------ 14 | 15 | .vscode/* 16 | !.vscode/settings.json 17 | !.vscode/tasks.json 18 | !.vscode/launch.json 19 | !.vscode/extensions.json 20 | 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | *~ 49 | 50 | # temporary files which can be created if a process still has a handle open of a deleted file 51 | .fuse_hidden* 52 | 53 | # KDE directory preferences 54 | .directory 55 | 56 | # Linux trash folder which might appear on any partition or disk 57 | .Trash-* 58 | 59 | # .nfs files are created when an open file is removed but is still being accessed 60 | .nfs* 61 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | extension-pkg-whitelist=numpy,torch,cv2 3 | init-hook="sys.path.insert(0, './tools'); import _init_paths" 4 | 5 | [MESSAGES CONTROL] 6 | disable=wrong-import-position 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | language: python 3 | cache: pip 4 | python: 5 | - 3.6 6 | #- nightly 7 | #- pypy3 8 | matrix: 9 | allow_failures: 10 | - python: nightly 11 | - python: pypy3 12 | install: 13 | #- pip install -r requirements.txt 14 | - pip install flake8 # pytest # add another testing frameworks later 15 | before_script: 16 | # stop the build if there are Python syntax errors or undefined names 17 | - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics 18 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 19 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 20 | script: 21 | - true # pytest --capture=sys # add other tests here 22 | notifications: 23 | on_success: change 24 | on_failure: change # `always` will be the setting once code changes slow down 25 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": true, 3 | "python.linting.flake8Enabled": false, 4 | "python.autoComplete.extraPaths": ["${workspaceRoot}/lib"], 5 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Roy Tseng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | 36 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | TRAIN: 22 | SCALES: (800,) 23 | MAX_SIZE: 1333 24 | IMS_PER_BATCH: 1 25 | BATCH_SIZE_PER_IM: 512 26 | TEST: 27 | SCALE: 800 28 | MAX_SIZE: 1333 29 | NMS: 0.5 30 | RPN_PRE_NMS_TOP_N: 6000 31 | RPN_POST_NMS_TOP_N: 1000 32 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 360000 15 | STEPS: [0, 240000, 320000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | TRAIN: 22 | SCALES: (800,) 23 | MAX_SIZE: 1333 24 | IMS_PER_BATCH: 1 25 | BATCH_SIZE_PER_IM: 512 26 | TEST: 27 | SCALE: 800 28 | MAX_SIZE: 1333 29 | NMS: 0.5 30 | RPN_PRE_NMS_TOP_N: 6000 31 | RPN_POST_NMS_TOP_N: 1000 32 | 33 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | TRAIN: 25 | SCALES: (800,) 26 | MAX_SIZE: 1333 27 | BATCH_SIZE_PER_IM: 512 28 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 29 | TEST: 30 | SCALE: 800 31 | MAX_SIZE: 1333 32 | NMS: 0.5 33 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 34 | RPN_POST_NMS_TOP_N: 1000 35 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 35 | TEST: 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 1000 41 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 130000 15 | STEPS: [0, 100000, 120000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | KRCNN: 26 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 27 | NUM_STACKED_CONVS: 8 28 | NUM_KEYPOINTS: 17 29 | USE_DECONV_OUTPUT: True 30 | CONV_INIT: MSRAFill 31 | CONV_HEAD_DIM: 512 32 | UP_SCALE: 2 33 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 36 | ROI_XFORM_SAMPLING_RATIO: 2 37 | KEYPOINT_CONFIDENCE: bbox 38 | TRAIN: 39 | SCALES: (640, 672, 704, 736, 768, 800) 40 | MAX_SIZE: 1333 41 | BATCH_SIZE_PER_IM: 512 42 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 43 | TEST: 44 | SCALE: 800 45 | MAX_SIZE: 1333 46 | NMS: 0.5 47 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 48 | RPN_POST_NMS_TOP_N: 1000 49 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | SCALES: (640, 672, 704, 736, 768, 800) 44 | MAX_SIZE: 1333 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet101_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | RESNETS: 8 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 9 | NUM_GPUS: 8 10 | SOLVER: 11 | WEIGHT_DECAY: 0.0001 12 | LR_POLICY: steps_with_decay 13 | BASE_LR: 0.02 14 | GAMMA: 0.1 15 | MAX_ITER: 90000 16 | STEPS: [0, 60000, 80000] 17 | FPN: 18 | FPN_ON: True 19 | MULTILEVEL_ROIS: True 20 | MULTILEVEL_RPN: True 21 | FAST_RCNN: 22 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 23 | ROI_XFORM_METHOD: RoIAlign 24 | ROI_XFORM_RESOLUTION: 7 25 | ROI_XFORM_SAMPLING_RATIO: 2 26 | MRCNN: 27 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 28 | RESOLUTION: 28 # (output mask resolution) default 14 29 | ROI_XFORM_METHOD: RoIAlign 30 | ROI_XFORM_RESOLUTION: 14 # default 7 31 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 32 | DILATION: 1 # default 2 33 | CONV_INIT: MSRAFill # default GaussianFill 34 | TRAIN: 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 44 | RPN_POST_NMS_TOP_N: 1000 45 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet101_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 180000 16 | STEPS: [0, 120000, 160000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.ResNet50_conv4_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 15 | MAX_ITER: 360000 16 | STEPS: [0, 240000, 320000] 17 | RPN: 18 | SIZES: (32, 64, 128, 256, 512) 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: ResNet.ResNet_roi_conv5_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | MRCNN: 23 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 24 | RESOLUTION: 14 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 14 27 | DILATION: 1 # default 2 28 | CONV_INIT: MSRAFill # default: GaussianFill 29 | TRAIN: 30 | SCALES: (800,) 31 | MAX_SIZE: 1333 32 | IMS_PER_BATCH: 1 33 | BATCH_SIZE_PER_IM: 512 34 | TEST: 35 | SCALE: 800 36 | MAX_SIZE: 1333 37 | NMS: 0.5 38 | RPN_PRE_NMS_TOP_N: 6000 39 | RPN_POST_NMS_TOP_N: 1000 40 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 90000 15 | STEPS: [0, 60000, 80000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | RESNETS: 7 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 8 | NUM_GPUS: 8 9 | SOLVER: 10 | WEIGHT_DECAY: 0.0001 11 | LR_POLICY: steps_with_decay 12 | BASE_LR: 0.02 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | FAST_RCNN: 21 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 22 | ROI_XFORM_METHOD: RoIAlign 23 | ROI_XFORM_RESOLUTION: 7 24 | ROI_XFORM_SAMPLING_RATIO: 2 25 | MRCNN: 26 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 27 | RESOLUTION: 28 # (output mask resolution) default 14 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 14 # default 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 31 | DILATION: 1 # default 2 32 | CONV_INIT: MSRAFill # default GaussianFill 33 | TRAIN: 34 | SCALES: (800,) 35 | MAX_SIZE: 1333 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-32x8d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-101-64x4d.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | SCALES: (800,) 40 | MAX_SIZE: 1333 41 | IMS_PER_BATCH: 1 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet152_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 260000 15 | STEPS: [0, 200000, 240000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/X-152-32x8d-IN5k.pkl' 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (640, 672, 704, 736, 768, 800) # Scale jitter 41 | MAX_SIZE: 1333 42 | IMS_PER_BATCH: 1 43 | BATCH_SIZE_PER_IM: 512 44 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 45 | TEST: 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | BBOX_VOTE: 50 | ENABLED: True 51 | VOTE_TH: 0.9 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | BBOX_AUG: 55 | ENABLED: True 56 | SCORE_HEUR: UNION 57 | COORD_HEUR: UNION 58 | H_FLIP: True 59 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 60 | MAX_SIZE: 2000 61 | SCALE_H_FLIP: True 62 | SCALE_SIZE_DEP: False 63 | ASPECT_RATIOS: () 64 | ASPECT_RATIO_H_FLIP: False 65 | MASK_AUG: 66 | ENABLED: True 67 | HEUR: SOFT_AVG 68 | H_FLIP: True 69 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 70 | MAX_SIZE: 2000 71 | SCALE_H_FLIP: True 72 | SCALE_SIZE_DEP: False 73 | ASPECT_RATIOS: () 74 | ASPECT_RATIO_H_FLIP: False 75 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 1 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.0025 12 | GAMMA: 0.1 13 | MAX_ITER: 60000 14 | STEPS: [0, 30000, 40000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 2 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.005 12 | GAMMA: 0.1 13 | MAX_ITER: 30000 14 | STEPS: [0, 15000, 20000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | 53 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 4 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 15000 14 | STEPS: [0, 7500, 10000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | RESNETS: 6 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth' 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 7500 14 | STEPS: [0, 3750, 5000] 15 | # Equivalent schedules with... 16 | # 1 GPU: 17 | # BASE_LR: 0.0025 18 | # MAX_ITER: 60000 19 | # STEPS: [0, 30000, 40000] 20 | # 2 GPUs: 21 | # BASE_LR: 0.005 22 | # MAX_ITER: 30000 23 | # STEPS: [0, 15000, 20000] 24 | # 4 GPUs: 25 | # BASE_LR: 0.01 26 | # MAX_ITER: 15000 27 | # STEPS: [0, 7500, 10000] 28 | # 8 GPUs: 29 | # BASE_LR: 0.02 30 | # MAX_ITER: 7500 31 | # STEPS: [0, 3750, 5000] 32 | FPN: 33 | FPN_ON: True 34 | MULTILEVEL_ROIS: True 35 | MULTILEVEL_RPN: True 36 | FAST_RCNN: 37 | ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 7 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | TRAIN: 42 | SCALES: (500,) 43 | MAX_SIZE: 833 44 | BATCH_SIZE_PER_IM: 256 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | SCALE: 500 48 | MAX_SIZE: 833 49 | NMS: 0.5 50 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 51 | RPN_POST_NMS_TOP_N: 1000 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 270000 13 | STEPS: [0, 210000, 250000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-101-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 270000 13 | STEPS: [0, 210000, 250000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | USE_GN: True # Note: use GN on the FPN-specific layers 19 | RESNETS: 20 | IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/R-50-GN.pkl' 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet101_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | LOAD_IMAGENET_PRETRAINED_WEIGHTS: False 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 270000 14 | STEPS: [0, 210000, 250000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | USE_GN: True # Note: use GN on the FPN-specific layers 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.fpn_ResNet50_conv5_body 4 | FASTER_RCNN: True 5 | MASK_ON: True 6 | LOAD_IMAGENET_PRETRAINED_WEIGHTS: False 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 270000 14 | STEPS: [0, 210000, 250000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | USE_GN: True # Note: use GN on the FPN-specific layers 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_gn_transformation # Note: this is a GN bottleneck transform 23 | STEM_FUNC: basic_gn_stem # Note: this is a GN stem 24 | SHORTCUT_FUNC: basic_gn_shortcut # Note: this is a GN shortcut 25 | USE_GN: True 26 | FAST_RCNN: 27 | ROI_BOX_HEAD: fast_rcnn_heads.roi_Xconv1fc_gn_head # Note: this is a Conv GN head 28 | ROI_XFORM_METHOD: RoIAlign 29 | ROI_XFORM_RESOLUTION: 7 30 | ROI_XFORM_SAMPLING_RATIO: 2 31 | MRCNN: 32 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn # Note: this is a GN mask head 33 | RESOLUTION: 28 # (output mask resolution) default 14 34 | ROI_XFORM_METHOD: RoIAlign 35 | ROI_XFORM_RESOLUTION: 14 # default 7 36 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 37 | DILATION: 1 # default 2 38 | CONV_INIT: MSRAFill # default GaussianFill 39 | TRAIN: 40 | SCALES: (800,) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | SCALE: 800 46 | MAX_SIZE: 1333 47 | NMS: 0.5 48 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 49 | RPN_POST_NMS_TOP_N: 1000 50 | -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-detectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-detectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-pydetectron-R101-FPN.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k-pydetectron-R50-C4.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/33823288584_1d21cf0a26_k.jpg -------------------------------------------------------------------------------- /demo/convert_pdf2img.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pdfdir='' 4 | 5 | while getopts 'd:' flag; do 6 | case "$flag" in 7 | d) pdfdir=$OPTARG ;; 8 | esac 9 | done 10 | 11 | for pdf in $(ls ${pdfdir}/img*.pdf); do 12 | fname="${pdf%.*}" 13 | convert -density 300x300 -quality 95 $pdf ${fname}.jpg 14 | done 15 | -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img1.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img2.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img3.jpg -------------------------------------------------------------------------------- /demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/e2e_mask_rcnn_R-50-C4/train_from_scratch_epoch1_bs4/img4.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img1_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img1_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img1_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-detectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img2_keypoints-detectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/img2_keypoints-pydetectron-R50-FPN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/img2_keypoints-pydetectron-R50-FPN.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_faster_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_keypoint_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_cmp_of_e2e_mask_rcnn_R-50-FPN_1x.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_keypoint_rcnn_R-50-FPN_1x_bs8.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs16.jpg -------------------------------------------------------------------------------- /demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/loss_e2e_mask_rcnn_R-50-FPN_1x_bs6.jpg -------------------------------------------------------------------------------- /demo/sample_images/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img1.jpg -------------------------------------------------------------------------------- /demo/sample_images/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img2.jpg -------------------------------------------------------------------------------- /demo/sample_images/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img3.jpg -------------------------------------------------------------------------------- /demo/sample_images/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images/img4.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img1_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images_keypoints/img1_keypoints.jpg -------------------------------------------------------------------------------- /demo/sample_images_keypoints/img2_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/demo/sample_images_keypoints/img2_keypoints.jpg -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/core/__init__.py -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/datasets/__init__.py -------------------------------------------------------------------------------- /lib/datasets/cityscapes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/datasets/cityscapes/__init__.py -------------------------------------------------------------------------------- /lib/datasets/cityscapes/coco_to_cityscapes_id.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | # mapping coco categories to cityscapes (our converted json) id 17 | # cityscapes 18 | # INFO roidb.py: 220: 1 bicycle: 7286 19 | # INFO roidb.py: 220: 2 car: 53684 20 | # INFO roidb.py: 220: 3 person: 35704 21 | # INFO roidb.py: 220: 4 train: 336 22 | # INFO roidb.py: 220: 5 truck: 964 23 | # INFO roidb.py: 220: 6 motorcycle: 1468 24 | # INFO roidb.py: 220: 7 bus: 758 25 | # INFO roidb.py: 220: 8 rider: 3504 26 | 27 | # coco (val5k) 28 | # INFO roidb.py: 220: 1 person: 21296 29 | # INFO roidb.py: 220: 2 bicycle: 628 30 | # INFO roidb.py: 220: 3 car: 3818 31 | # INFO roidb.py: 220: 4 motorcycle: 732 32 | # INFO roidb.py: 220: 5 airplane: 286 <------ irrelevant 33 | # INFO roidb.py: 220: 6 bus: 564 34 | # INFO roidb.py: 220: 7 train: 380 35 | # INFO roidb.py: 220: 8 truck: 828 36 | 37 | 38 | def cityscapes_to_coco(cityscapes_id): 39 | lookup = { 40 | 0: 0, # ... background 41 | 1: 2, # bicycle 42 | 2: 3, # car 43 | 3: 1, # person 44 | 4: 7, # train 45 | 5: 8, # truck 46 | 6: 4, # motorcycle 47 | 7: 6, # bus 48 | 8: -1, # rider (-1 means rand init) 49 | } 50 | return lookup[cityscapes_id] 51 | 52 | 53 | def cityscapes_to_coco_with_rider(cityscapes_id): 54 | lookup = { 55 | 0: 0, # ... background 56 | 1: 2, # bicycle 57 | 2: 3, # car 58 | 3: 1, # person 59 | 4: 7, # train 60 | 5: 8, # truck 61 | 6: 4, # motorcycle 62 | 7: 6, # bus 63 | 8: 1, # rider ("person", *rider has human right!*) 64 | } 65 | return lookup[cityscapes_id] 66 | 67 | 68 | def cityscapes_to_coco_without_person_rider(cityscapes_id): 69 | lookup = { 70 | 0: 0, # ... background 71 | 1: 2, # bicycle 72 | 2: 3, # car 73 | 3: -1, # person (ignore) 74 | 4: 7, # train 75 | 5: 8, # truck 76 | 6: 4, # motorcycle 77 | 7: 6, # bus 78 | 8: -1, # rider (ignore) 79 | } 80 | return lookup[cityscapes_id] 81 | 82 | 83 | def cityscapes_to_coco_all_random(cityscapes_id): 84 | lookup = { 85 | 0: -1, # ... background 86 | 1: -1, # bicycle 87 | 2: -1, # car 88 | 3: -1, # person (ignore) 89 | 4: -1, # train 90 | 5: -1, # truck 91 | 6: -1, # motorcycle 92 | 7: -1, # bus 93 | 8: -1, # rider (ignore) 94 | } 95 | return lookup[cityscapes_id] 96 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes/tools/convert_coco_model_to_cityscapes.py: -------------------------------------------------------------------------------- 1 | # Convert a detection model trained for COCO into a model that can be fine-tuned 2 | # on cityscapes 3 | # 4 | # cityscapes_to_coco 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | from __future__ import unicode_literals 10 | 11 | from six.moves import cPickle as pickle 12 | import argparse 13 | import os 14 | import sys 15 | import numpy as np 16 | 17 | import datasets.cityscapes.coco_to_cityscapes_id as cs 18 | 19 | NUM_CS_CLS = 9 20 | NUM_COCO_CLS = 81 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser( 25 | description='Convert a COCO pre-trained model for use with Cityscapes') 26 | parser.add_argument( 27 | '--coco_model', dest='coco_model_file_name', 28 | help='Pretrained network weights file path', 29 | default=None, type=str) 30 | parser.add_argument( 31 | '--convert_func', dest='convert_func', 32 | help='Blob conversion function', 33 | default='cityscapes_to_coco', type=str) 34 | parser.add_argument( 35 | '--output', dest='out_file_name', 36 | help='Output file path', 37 | default=None, type=str) 38 | 39 | if len(sys.argv) == 1: 40 | parser.print_help() 41 | sys.exit(1) 42 | 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def convert_coco_blobs_to_cityscape_blobs(model_dict): 48 | for k, v in model_dict['blobs'].items(): 49 | if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS: 50 | coco_blob = model_dict['blobs'][k] 51 | print( 52 | 'Converting COCO blob {} with shape {}'. 53 | format(k, coco_blob.shape) 54 | ) 55 | cs_blob = convert_coco_blob_to_cityscapes_blob( 56 | coco_blob, args.convert_func 57 | ) 58 | print(' -> converted shape {}'.format(cs_blob.shape)) 59 | model_dict['blobs'][k] = cs_blob 60 | 61 | 62 | def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func): 63 | # coco blob (81, ...) or (81*4, ...) 64 | coco_shape = coco_blob.shape 65 | leading_factor = int(coco_shape[0] / NUM_COCO_CLS) 66 | tail_shape = list(coco_shape[1:]) 67 | assert leading_factor == 1 or leading_factor == 4 68 | 69 | # Reshape in [num_classes, ...] form for easier manipulations 70 | coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape) 71 | # Default initialization uses Gaussian with mean and std to match the 72 | # existing parameters 73 | std = coco_blob.std() 74 | mean = coco_blob.mean() 75 | cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:]) 76 | cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32) 77 | 78 | # Replace random parameters with COCO parameters if class mapping exists 79 | for i in range(NUM_CS_CLS): 80 | coco_cls_id = getattr(cs, convert_func)(i) 81 | if coco_cls_id >= 0: # otherwise ignore (rand init) 82 | cs_blob[i] = coco_blob[coco_cls_id] 83 | 84 | cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape 85 | return cs_blob.reshape(cs_shape) 86 | 87 | 88 | def remove_momentum(model_dict): 89 | for k in model_dict['blobs'].keys(): 90 | if k.endswith('_momentum'): 91 | del model_dict['blobs'][k] 92 | 93 | 94 | def load_and_convert_coco_model(args): 95 | with open(args.coco_model_file_name, 'r') as f: 96 | model_dict = pickle.load(f) 97 | remove_momentum(model_dict) 98 | convert_coco_blobs_to_cityscape_blobs(model_dict) 99 | return model_dict 100 | 101 | 102 | if __name__ == '__main__': 103 | args = parse_args() 104 | print(args) 105 | assert os.path.exists(args.coco_model_file_name), \ 106 | 'Weights file does not exist' 107 | weights = load_and_convert_coco_model(args) 108 | 109 | with open(args.out_file_name, 'w') as f: 110 | pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL) 111 | print('Wrote blobs to {}:'.format(args.out_file_name)) 112 | print(sorted(weights['blobs'].keys())) 113 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes_json_dataset_evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Functions for evaluating results on Cityscapes.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import logging 25 | import os 26 | import uuid 27 | 28 | import pycocotools.mask as mask_util 29 | 30 | from core.config import cfg 31 | from datasets.dataset_catalog import DATASETS 32 | from datasets.dataset_catalog import RAW_DIR 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def evaluate_masks( 38 | json_dataset, 39 | all_boxes, 40 | all_segms, 41 | output_dir, 42 | use_salt=True, 43 | cleanup=False 44 | ): 45 | if cfg.CLUSTER.ON_CLUSTER: 46 | # On the cluster avoid saving these files in the job directory 47 | output_dir = '/tmp' 48 | res_file = os.path.join( 49 | output_dir, 'segmentations_' + json_dataset.name + '_results') 50 | if use_salt: 51 | res_file += '_{}'.format(str(uuid.uuid4())) 52 | res_file += '.json' 53 | 54 | results_dir = os.path.join(output_dir, 'results') 55 | if not os.path.exists(results_dir): 56 | os.mkdir(results_dir) 57 | 58 | os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR] 59 | os.environ['CITYSCAPES_RESULTS'] = output_dir 60 | 61 | # Load the Cityscapes eval script *after* setting the required env vars, 62 | # since the script reads their values into global variables (at load time). 63 | import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ 64 | as cityscapes_eval 65 | 66 | roidb = json_dataset.get_roidb() 67 | for i, entry in enumerate(roidb): 68 | im_name = entry['image'] 69 | 70 | basename = os.path.splitext(os.path.basename(im_name))[0] 71 | txtname = os.path.join(output_dir, basename + 'pred.txt') 72 | with open(txtname, 'w') as fid_txt: 73 | if i % 10 == 0: 74 | logger.info('i: {}: {}'.format(i, basename)) 75 | for j in range(1, len(all_segms)): 76 | clss = json_dataset.classes[j] 77 | clss_id = cityscapes_eval.name2label[clss].id 78 | segms = all_segms[j][i] 79 | boxes = all_boxes[j][i] 80 | if segms == []: 81 | continue 82 | masks = mask_util.decode(segms) 83 | 84 | for k in range(boxes.shape[0]): 85 | score = boxes[k, -1] 86 | mask = masks[:, :, k] 87 | pngname = os.path.join( 88 | 'results', 89 | basename + '_' + clss + '_{}.png'.format(k)) 90 | # write txt 91 | fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score)) 92 | # save mask 93 | cv2.imwrite(os.path.join(output_dir, pngname), mask * 255) 94 | logger.info('Evaluating...') 95 | cityscapes_eval.main([]) 96 | return None 97 | -------------------------------------------------------------------------------- /lib/datasets/dummy_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use 16 | cases, like getting all classes in a dataset. This exists so that demos can be 17 | run without requiring users to download/install datasets first. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | from utils.collections import AttrDict 26 | 27 | 28 | def get_coco_dataset(): 29 | """A dummy COCO dataset that includes only the 'classes' field.""" 30 | ds = AttrDict() 31 | classes = [ 32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 45 | ] 46 | ds.classes = {i: name for i, name in enumerate(classes)} 47 | return ds 48 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | python setup.py build_ext --inplace 6 | rm -rf build 7 | 8 | # Choose cuda arch as you need 9 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 10 | -gencode arch=compute_35,code=sm_35 \ 11 | -gencode arch=compute_50,code=sm_50 \ 12 | -gencode arch=compute_52,code=sm_52 \ 13 | -gencode arch=compute_60,code=sm_60 \ 14 | -gencode arch=compute_61,code=sm_61 " 15 | # -gencode arch=compute_70,code=sm_70 " 16 | 17 | # compile NMS 18 | cd model/nms/src 19 | echo "Compiling nms kernels by nvcc..." 20 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 21 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 22 | 23 | cd ../ 24 | python build.py 25 | 26 | # compile roi_pooling 27 | cd ../../ 28 | cd model/roi_pooling/src 29 | echo "Compiling roi pooling kernels by nvcc..." 30 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 31 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 32 | cd ../ 33 | python build.py 34 | 35 | # # compile roi_align 36 | # cd ../../ 37 | # cd model/roi_align/src 38 | # echo "Compiling roi align kernels by nvcc..." 39 | # nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 40 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 41 | # cd ../ 42 | # python build.py 43 | 44 | # compile roi_crop 45 | cd ../../ 46 | cd model/roi_crop/src 47 | echo "Compiling roi crop kernels by nvcc..." 48 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 49 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 50 | cd ../ 51 | python build.py 52 | 53 | # compile roi_align (based on Caffe2's implementation) 54 | cd ../../ 55 | cd modeling/roi_xfrom/roi_align/src 56 | echo "Compiling roi align kernels by nvcc..." 57 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 58 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 59 | cd ../ 60 | python build.py 61 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from core.config import cfg 9 | from model.nms.nms_gpu import nms_gpu 10 | 11 | def nms(dets, thresh, force_cpu=False): 12 | """Dispatch to either CPU or GPU NMS implementations.""" 13 | if dets.shape[0] == 0: 14 | return [] 15 | # ---numpy version--- 16 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 17 | # ---pytorch version--- 18 | return nms_gpu(dets, thresh) 19 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | THCudaTensor_size(state, boxes_host, 0), 15 | THCudaTensor_size(state, boxes_host, 1), 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_align_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | raise NotImplementedError 30 | 31 | return output 32 | 33 | def backward(self, grad_output): 34 | assert(self.feature_size is not None and grad_output.is_cuda) 35 | 36 | batch_size, num_channels, data_height, data_width = self.feature_size 37 | 38 | grad_input = self.rois.new(batch_size, num_channels, data_height, 39 | data_width).zero_() 40 | roi_align.roi_align_backward_cuda(self.aligned_height, 41 | self.aligned_width, 42 | self.spatial_scale, grad_output, 43 | self.rois, grad_input) 44 | 45 | # print grad_input 46 | 47 | return grad_input, None 48 | -------------------------------------------------------------------------------- /lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/_crop_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python3 build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_pooling_cuda.c'] 15 | headers += ['src/roi_pooling_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_pooling.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_pooling', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /lib/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/__init__.py -------------------------------------------------------------------------------- /lib/modeling/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Faster R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick and Sean Bell 22 | # -------------------------------------------------------- 23 | 24 | import numpy as np 25 | 26 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 27 | # 28 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 29 | # >> anchors 30 | # 31 | # anchors = 32 | # 33 | # -83 -39 100 56 34 | # -175 -87 192 104 35 | # -359 -183 376 200 36 | # -55 -55 72 72 37 | # -119 -119 136 136 38 | # -247 -247 264 264 39 | # -35 -79 52 96 40 | # -79 -167 96 184 41 | # -167 -343 184 360 42 | 43 | # array([[ -83., -39., 100., 56.], 44 | # [-175., -87., 192., 104.], 45 | # [-359., -183., 376., 200.], 46 | # [ -55., -55., 72., 72.], 47 | # [-119., -119., 136., 136.], 48 | # [-247., -247., 264., 264.], 49 | # [ -35., -79., 52., 96.], 50 | # [ -79., -167., 96., 184.], 51 | # [-167., -343., 184., 360.]]) 52 | 53 | 54 | def generate_anchors( 55 | stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2) 56 | ): 57 | """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 58 | are centered on stride / 2, have (approximate) sqrt areas of the specified 59 | sizes, and aspect ratios as given. 60 | """ 61 | return _generate_anchors( 62 | stride, 63 | np.array(sizes, dtype=np.float) / stride, 64 | np.array(aspect_ratios, dtype=np.float) 65 | ) 66 | 67 | 68 | def _generate_anchors(base_size, scales, aspect_ratios): 69 | """Generate anchor (reference) windows by enumerating aspect ratios X 70 | scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. 71 | """ 72 | anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1 73 | anchors = _ratio_enum(anchor, aspect_ratios) 74 | anchors = np.vstack( 75 | [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] 76 | ) 77 | return anchors 78 | 79 | 80 | def _whctrs(anchor): 81 | """Return width, height, x center, and y center for an anchor (window).""" 82 | w = anchor[2] - anchor[0] + 1 83 | h = anchor[3] - anchor[1] + 1 84 | x_ctr = anchor[0] + 0.5 * (w - 1) 85 | y_ctr = anchor[1] + 0.5 * (h - 1) 86 | return w, h, x_ctr, y_ctr 87 | 88 | 89 | def _mkanchors(ws, hs, x_ctr, y_ctr): 90 | """Given a vector of widths (ws) and heights (hs) around a center 91 | (x_ctr, y_ctr), output a set of anchors (windows). 92 | """ 93 | ws = ws[:, np.newaxis] 94 | hs = hs[:, np.newaxis] 95 | anchors = np.hstack( 96 | ( 97 | x_ctr - 0.5 * (ws - 1), 98 | y_ctr - 0.5 * (hs - 1), 99 | x_ctr + 0.5 * (ws - 1), 100 | y_ctr + 0.5 * (hs - 1) 101 | ) 102 | ) 103 | return anchors 104 | 105 | 106 | def _ratio_enum(anchor, ratios): 107 | """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" 108 | w, h, x_ctr, y_ctr = _whctrs(anchor) 109 | size = w * h 110 | size_ratios = size / ratios 111 | ws = np.round(np.sqrt(size_ratios)) 112 | hs = np.round(ws * ratios) 113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 114 | return anchors 115 | 116 | 117 | def _scale_enum(anchor, scales): 118 | """Enumerate a set of anchors for each scale wrt an anchor.""" 119 | w, h, x_ctr, y_ctr = _whctrs(anchor) 120 | ws = w * scales 121 | hs = h * scales 122 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 123 | return anchors 124 | -------------------------------------------------------------------------------- /lib/modeling/generate_proposal_labels.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from core.config import cfg 4 | from datasets import json_dataset 5 | import roi_data.fast_rcnn 6 | 7 | 8 | class GenerateProposalLabelsOp(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | 12 | def forward(self, rpn_rois, roidb, im_info): 13 | """Op for generating training labels for RPN proposals. This is used 14 | when training RPN jointly with Fast/Mask R-CNN (as in end-to-end 15 | Faster R-CNN training). 16 | 17 | blobs_in: 18 | - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals 19 | - 'roidb': roidb entries that will be labeled 20 | - 'im_info': See GenerateProposals doc. 21 | 22 | blobs_out: 23 | - (variable set of blobs): returns whatever blobs are required for 24 | training the model. It does this by querying the data loader for 25 | the list of blobs that are needed. 26 | """ 27 | im_scales = im_info.data.numpy()[:, 2] 28 | 29 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 30 | # For historical consistency with the original Faster R-CNN 31 | # implementation we are *not* filtering crowd proposals. 32 | # This choice should be investigated in the future (it likely does 33 | # not matter). 34 | # Note: crowd_thresh=0 will ignore _filter_crowd_proposals 35 | json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0) 36 | blobs = {k: [] for k in output_blob_names} 37 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 38 | 39 | return blobs 40 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_align_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.sampling_ratio = int(sampling_ratio) 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | self.rois = rois 18 | self.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 24 | if features.is_cuda: 25 | roi_align.roi_align_forward_cuda(self.aligned_height, 26 | self.aligned_width, 27 | self.spatial_scale, self.sampling_ratio, features, 28 | rois, output) 29 | else: 30 | raise NotImplementedError 31 | 32 | return output 33 | 34 | def backward(self, grad_output): 35 | assert(self.feature_size is not None and grad_output.is_cuda) 36 | 37 | batch_size, num_channels, data_height, data_width = self.feature_size 38 | 39 | grad_input = self.rois.new(batch_size, num_channels, data_height, 40 | data_width).zero_() 41 | roi_align.roi_align_backward_cuda(self.aligned_height, 42 | self.aligned_width, 43 | self.spatial_scale, self.sampling_ratio, grad_output, 44 | self.rois, grad_input) 45 | 46 | # print grad_input 47 | 48 | return grad_input, None 49 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/modeling/roi_xfrom/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | self.sampling_ratio = int(sampling_ratio) 14 | 15 | def forward(self, features, rois): 16 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 17 | self.spatial_scale, self.sampling_ratio)(features, rois) 18 | 19 | class RoIAlignAvg(Module): 20 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 21 | super(RoIAlignAvg, self).__init__() 22 | 23 | self.aligned_width = int(aligned_width) 24 | self.aligned_height = int(aligned_height) 25 | self.spatial_scale = float(spatial_scale) 26 | self.sampling_ratio = int(sampling_ratio) 27 | 28 | def forward(self, features, rois): 29 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 30 | self.spatial_scale, self.sampling_ratio)(features, rois) 31 | return avg_pool2d(x, kernel_size=2, stride=1) 32 | 33 | class RoIAlignMax(Module): 34 | def __init__(self, aligned_height, aligned_width, spatial_scale, sampling_ratio): 35 | super(RoIAlignMax, self).__init__() 36 | 37 | self.aligned_width = int(aligned_width) 38 | self.aligned_height = int(aligned_height) 39 | self.spatial_scale = float(spatial_scale) 40 | self.sampling_ratio = int(sampling_ratio) 41 | 42 | def forward(self, features, rois): 43 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 44 | self.spatial_scale, self.sampling_ratio)(features, rois) 45 | return max_pool2d(x, kernel_size=2, stride=1) 46 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, sampling_ratio, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, sampling_ratio, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, int sampling_ratio, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/modeling/roi_xfrom/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const int sampling_ratio, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, const int sampling_ratio, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const int sampling_ratio, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | from .parallel import DataParallel 3 | from . import init -------------------------------------------------------------------------------- /lib/nn/functional.py: -------------------------------------------------------------------------------- 1 | """Functional interface""" 2 | 3 | 4 | def group_norm(x, num_groups, weight=None, bias=None, eps=1e-5): 5 | input_shape = x.shape 6 | ndim = len(input_shape) 7 | N, C = input_shape[:2] 8 | G = num_groups 9 | assert C % G == 0, "input channel dimension must divisible by number of groups" 10 | x = x.view(N, G, -1) 11 | mean = x.mean(-1, keepdim=True) 12 | var = x.var(-1, keepdim=True) 13 | x = (x - mean) / (var + eps).sqrt() 14 | x = x.view(input_shape) 15 | view_shape = (1, -1) + (1,) * (ndim - 2) 16 | if weight is not None: 17 | return x * weight.view(view_shape) + bias.view(view_shape) 18 | return x 19 | -------------------------------------------------------------------------------- /lib/nn/init.py: -------------------------------------------------------------------------------- 1 | """Parameter initialization functions 2 | """ 3 | 4 | import math 5 | import operator 6 | from functools import reduce 7 | 8 | import torch.nn.init as init 9 | 10 | 11 | def XavierFill(tensor): 12 | """Caffe2 XavierFill Implementation""" 13 | size = reduce(operator.mul, tensor.shape, 1) 14 | fan_in = size / tensor.shape[0] 15 | scale = math.sqrt(3 / fan_in) 16 | return init.uniform_(tensor, -scale, scale) 17 | 18 | 19 | def MSRAFill(tensor): 20 | """Caffe2 MSRAFill Implementation""" 21 | size = reduce(operator.mul, tensor.shape, 1) 22 | fan_out = size / tensor.shape[1] 23 | scale = math.sqrt(2 / fan_out) 24 | return init.normal_(tensor, 0, scale) 25 | -------------------------------------------------------------------------------- /lib/nn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine import AffineChannel2d 2 | from .normalization import GroupNorm 3 | from .upsample import BilinearInterpolation2d 4 | -------------------------------------------------------------------------------- /lib/nn/modules/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AffineChannel2d(nn.Module): 6 | """ A simple channel-wise affine transformation operation """ 7 | def __init__(self, num_features): 8 | super().__init__() 9 | self.num_features = num_features 10 | self.weight = nn.Parameter(torch.Tensor(num_features)) 11 | self.bias = nn.Parameter(torch.Tensor(num_features)) 12 | self.weight.data.uniform_() 13 | self.bias.data.zero_() 14 | 15 | def forward(self, x): 16 | return x * self.weight.view(1, self.num_features, 1, 1) + \ 17 | self.bias.view(1, self.num_features, 1, 1) 18 | -------------------------------------------------------------------------------- /lib/nn/modules/normalization.py: -------------------------------------------------------------------------------- 1 | """Normalization Layers""" 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | import nn.functional as myF 7 | 8 | 9 | class GroupNorm(nn.Module): 10 | def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): 11 | super().__init__() 12 | self.num_groups = num_groups 13 | self.num_channels = num_channels 14 | self.eps = eps 15 | self.affine = affine 16 | if self.affine: 17 | self.weight = nn.Parameter(torch.Tensor(num_channels)) 18 | self.bias = nn.Parameter(torch.Tensor(num_channels)) 19 | else: 20 | self.register_parameter('weight', None) 21 | self.register_parameter('bias', None) 22 | self.reset_parameters() 23 | 24 | def reset_parameters(self): 25 | if self.affine: 26 | self.weight.data.fill_(1) 27 | self.bias.data.zero_() 28 | 29 | def forward(self, x): 30 | return myF.group_norm( 31 | x, self.num_groups, self.weight, self.bias, self.eps 32 | ) 33 | 34 | def extra_repr(self): 35 | return '{num_groups}, {num_channels}, eps={eps}, ' \ 36 | 'affine={affine}'.format(**self.__dict__) 37 | -------------------------------------------------------------------------------- /lib/nn/modules/upsample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | 9 | class BilinearInterpolation2d(nn.Module): 10 | """Bilinear interpolation in space of scale. 11 | 12 | Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale 13 | 14 | Adapted from the CVPR'15 FCN code. 15 | See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py 16 | """ 17 | def __init__(self, in_channels, out_channels, up_scale): 18 | super().__init__() 19 | assert in_channels == out_channels 20 | assert up_scale % 2 == 0, 'Scale should be even' 21 | self.in_channes = in_channels 22 | self.out_channels = out_channels 23 | self.up_scale = int(up_scale) 24 | self.padding = up_scale // 2 25 | 26 | def upsample_filt(size): 27 | factor = (size + 1) // 2 28 | if size % 2 == 1: 29 | center = factor - 1 30 | else: 31 | center = factor - 0.5 32 | og = np.ogrid[:size, :size] 33 | return ((1 - abs(og[0] - center) / factor) * 34 | (1 - abs(og[1] - center) / factor)) 35 | 36 | kernel_size = up_scale * 2 37 | bil_filt = upsample_filt(kernel_size) 38 | 39 | kernel = np.zeros( 40 | (in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32 41 | ) 42 | kernel[range(in_channels), range(out_channels), :, :] = bil_filt 43 | 44 | self.upconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, 45 | stride=self.up_scale, padding=self.padding) 46 | 47 | self.upconv.weight.data.copy_(torch.from_numpy(kernel)) 48 | self.upconv.bias.data.fill_(0) 49 | self.upconv.weight.requires_grad = False 50 | self.upconv.bias.requires_grad = False 51 | 52 | def forward(self, x): 53 | return self.upconv(x) 54 | -------------------------------------------------------------------------------- /lib/nn/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | from .parallel_apply import parallel_apply 2 | from .replicate import replicate 3 | from .data_parallel import DataParallel, data_parallel 4 | from .scatter_gather import scatter, gather 5 | 6 | __all__ = ['replicate', 'scatter', 'parallel_apply', 'gather', 'data_parallel', 7 | 'DataParallel'] 8 | -------------------------------------------------------------------------------- /lib/nn/parallel/_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.cuda.comm as comm 3 | from torch.autograd import Function 4 | 5 | 6 | class Broadcast(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, target_gpus, *inputs): 10 | if not all(input.is_cuda for input in inputs): 11 | raise TypeError('Broadcast function not implemented for CPU tensors') 12 | ctx.target_gpus = target_gpus 13 | if len(inputs) == 0: 14 | return tuple() 15 | ctx.num_inputs = len(inputs) 16 | ctx.input_device = inputs[0].get_device() 17 | outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) 18 | non_differentiables = [] 19 | for idx, input_requires_grad in enumerate(ctx.needs_input_grad[1:]): 20 | if not input_requires_grad: 21 | for output in outputs: 22 | non_differentiables.append(output[idx]) 23 | ctx.mark_non_differentiable(*non_differentiables) 24 | return tuple([t for tensors in outputs for t in tensors]) 25 | 26 | @staticmethod 27 | def backward(ctx, *grad_outputs): 28 | return (None,) + ReduceAddCoalesced.apply(ctx.input_device, ctx.num_inputs, *grad_outputs) 29 | 30 | 31 | class ReduceAddCoalesced(Function): 32 | 33 | @staticmethod 34 | def forward(ctx, destination, num_inputs, *grads): 35 | ctx.target_gpus = [grads[i].get_device() for i in range(0, len(grads), num_inputs)] 36 | 37 | grads = [grads[i:i + num_inputs] 38 | for i in range(0, len(grads), num_inputs)] 39 | return comm.reduce_add_coalesced(grads, destination) 40 | 41 | @staticmethod 42 | def backward(ctx, *grad_outputs): 43 | return (None, None,) + Broadcast.apply(ctx.target_gpus, *grad_outputs) 44 | 45 | 46 | class Gather(Function): 47 | 48 | @staticmethod 49 | def forward(ctx, target_device, dim, *inputs): 50 | assert all(map(lambda i: i.is_cuda, inputs)) 51 | ctx.target_device = target_device 52 | ctx.dim = dim 53 | ctx.input_gpus = tuple(map(lambda i: i.get_device(), inputs)) 54 | ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs)) 55 | return comm.gather(inputs, ctx.dim, ctx.target_device) 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | return (None, None) + Scatter.apply(ctx.input_gpus, ctx.input_sizes, ctx.dim, grad_output) 60 | 61 | 62 | class Scatter(Function): 63 | 64 | @staticmethod 65 | def forward(ctx, target_gpus, chunk_sizes, dim, input): 66 | ctx.target_gpus = target_gpus 67 | ctx.chunk_sizes = chunk_sizes 68 | ctx.dim = dim 69 | ctx.input_device = input.get_device() if input.is_cuda else -1 70 | streams = None 71 | if ctx.input_device == -1: 72 | # Perform CPU to GPU copies in a background stream 73 | streams = [_get_stream(device) for device in ctx.target_gpus] 74 | outputs = comm.scatter(input, ctx.target_gpus, ctx.chunk_sizes, ctx.dim, streams) 75 | # Synchronize with the copy stream 76 | if streams is not None: 77 | for i, output in enumerate(outputs): 78 | with torch.cuda.device(ctx.target_gpus[i]): 79 | main_stream = torch.cuda.current_stream() 80 | main_stream.wait_stream(streams[i]) 81 | output.record_stream(main_stream) 82 | return outputs 83 | 84 | @staticmethod 85 | def backward(ctx, *grad_output): 86 | return None, None, None, Gather.apply(ctx.input_device, ctx.dim, *grad_output) 87 | 88 | 89 | # background streams used for copying 90 | _streams = None 91 | 92 | 93 | def _get_stream(device): 94 | """Gets a background stream for copying between CPU and GPU""" 95 | global _streams 96 | if device == -1: 97 | return None 98 | if _streams is None: 99 | _streams = [None] * torch.cuda.device_count() 100 | if _streams[device] is None: 101 | _streams[device] = torch.cuda.Stream(device) 102 | return _streams[device] 103 | -------------------------------------------------------------------------------- /lib/nn/parallel/parallel_apply.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import torch 3 | from torch.autograd import Variable 4 | 5 | 6 | def get_a_var(obj): 7 | if isinstance(obj, Variable): 8 | return obj 9 | 10 | if isinstance(obj, list) or isinstance(obj, tuple): 11 | results = map(get_a_var, obj) 12 | for result in results: 13 | if isinstance(result, Variable): 14 | return result 15 | if isinstance(obj, dict): 16 | results = map(get_a_var, obj.items()) 17 | for result in results: 18 | if isinstance(result, Variable): 19 | return result 20 | return None 21 | 22 | 23 | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): 24 | assert len(modules) == len(inputs) 25 | if kwargs_tup is not None: 26 | assert len(modules) == len(kwargs_tup) 27 | else: 28 | kwargs_tup = ({},) * len(modules) 29 | if devices is not None: 30 | assert len(modules) == len(devices) 31 | else: 32 | devices = [None] * len(modules) 33 | 34 | lock = threading.Lock() 35 | results = {} 36 | 37 | def _worker(i, module, input, kwargs, results, lock, device=None): 38 | if device is None: 39 | device = get_a_var(input).get_device() 40 | try: 41 | with torch.cuda.device(device): 42 | output = module(*input, **kwargs) 43 | with lock: 44 | results[i] = output 45 | except Exception as e: 46 | with lock: 47 | results[i] = e 48 | 49 | if len(modules) > 1: 50 | threads = [threading.Thread(target=_worker, 51 | args=(i, module, input, kwargs, results, lock, device), 52 | ) 53 | for i, (module, input, kwargs, device) in 54 | enumerate(zip(modules, inputs, kwargs_tup, devices))] 55 | 56 | for thread in threads: 57 | thread.start() 58 | for thread in threads: 59 | thread.join() 60 | else: 61 | _worker(0, modules[0], inputs[0], kwargs_tup[0], results, lock, devices[0]) 62 | 63 | outputs = [] 64 | for i in range(len(inputs)): 65 | output = results[i] 66 | if isinstance(output, Exception): 67 | raise output 68 | outputs.append(output) 69 | return outputs 70 | -------------------------------------------------------------------------------- /lib/nn/parallel/replicate.py: -------------------------------------------------------------------------------- 1 | import torch.cuda.comm as comm 2 | 3 | 4 | def replicate(network, devices): 5 | from ._functions import Broadcast 6 | 7 | devices = tuple(devices) 8 | num_replicas = len(devices) 9 | 10 | params = list(network.parameters()) 11 | param_indices = {param: idx for idx, param in enumerate(params)} 12 | param_copies = Broadcast.apply(devices, *params) 13 | if len(params) > 0: 14 | param_copies = [param_copies[i:i + len(params)] 15 | for i in range(0, len(param_copies), len(params))] 16 | 17 | buffers = list(network._all_buffers()) 18 | buffer_indices = {buf: idx for idx, buf in enumerate(buffers)} 19 | buffer_copies = comm.broadcast_coalesced(buffers, devices) 20 | 21 | modules = list(network.modules()) 22 | module_copies = [[] for device in devices] 23 | module_indices = {} 24 | 25 | for i, module in enumerate(modules): 26 | module_indices[module] = i 27 | for j in range(num_replicas): 28 | replica = module.__new__(type(module)) 29 | replica.__dict__ = module.__dict__.copy() 30 | replica._parameters = replica._parameters.copy() 31 | replica._buffers = replica._buffers.copy() 32 | replica._modules = replica._modules.copy() 33 | module_copies[j].append(replica) 34 | 35 | for i, module in enumerate(modules): 36 | for key, child in module._modules.items(): 37 | if child is None: 38 | for j in range(num_replicas): 39 | replica = module_copies[j][i] 40 | replica._modules[key] = None 41 | else: 42 | module_idx = module_indices[child] 43 | for j in range(num_replicas): 44 | replica = module_copies[j][i] 45 | replica._modules[key] = module_copies[j][module_idx] 46 | for key, param in module._parameters.items(): 47 | if param is None: 48 | for j in range(num_replicas): 49 | replica = module_copies[j][i] 50 | replica._parameters[key] = None 51 | else: 52 | param_idx = param_indices[param] 53 | for j in range(num_replicas): 54 | replica = module_copies[j][i] 55 | replica._parameters[key] = param_copies[j][param_idx] 56 | for key, buf in module._buffers.items(): 57 | if buf is None: 58 | for j in range(num_replicas): 59 | replica = module_copies[j][i] 60 | replica._buffers[key] = None 61 | else: 62 | buffer_idx = buffer_indices[buf] 63 | for j in range(num_replicas): 64 | replica = module_copies[j][i] 65 | replica._buffers[key] = buffer_copies[j][buffer_idx] 66 | 67 | return [module_copies[j][0] for j in range(num_replicas)] 68 | -------------------------------------------------------------------------------- /lib/nn/parallel/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import re 3 | import numpy as np 4 | import torch 5 | from torch.autograd import Variable 6 | from ._functions import Scatter, Gather 7 | from torch._six import string_classes, int_classes 8 | from torch.utils.data.dataloader import numpy_type_map 9 | 10 | 11 | def scatter(inputs, target_gpus, dim=0): 12 | r""" 13 | Slices variables into approximately equal chunks and 14 | distributes them across given GPUs. Duplicates 15 | references to objects that are not variables. Does not 16 | support Tensors. 17 | """ 18 | def scatter_map(obj): 19 | if isinstance(obj, Variable): 20 | return Scatter.apply(target_gpus, None, dim, obj) 21 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 22 | if isinstance(obj, tuple) and len(obj) > 0: 23 | return list(zip(*map(scatter_map, obj))) 24 | if isinstance(obj, list) and len(obj) > 0: 25 | return list(map(list, zip(*map(scatter_map, obj)))) 26 | if isinstance(obj, dict) and len(obj) > 0: 27 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 28 | return [obj for targets in target_gpus] 29 | 30 | # After scatter_map is called, a scatter_map cell will exist. This cell 31 | # has a reference to the actual function scatter_map, which has references 32 | # to a closure that has a reference to the scatter_map cell (because the 33 | # fn is recursive). To avoid this reference cycle, we set the function to 34 | # None, clearing the cell 35 | try: 36 | return scatter_map(inputs) 37 | finally: 38 | scatter_map = None 39 | 40 | 41 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): 42 | r"""Scatter with support for kwargs dictionary""" 43 | inputs = scatter(inputs, target_gpus, dim) if inputs else [] 44 | kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] 45 | if len(inputs) < len(kwargs): 46 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 47 | elif len(kwargs) < len(inputs): 48 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 49 | inputs = tuple(inputs) 50 | kwargs = tuple(kwargs) 51 | return inputs, kwargs 52 | 53 | 54 | def gather(outputs, target_device, dim=0): 55 | r""" 56 | Gathers variables from different GPUs on a specified device 57 | (-1 means the CPU). 58 | """ 59 | error_msg = "outputs must contain tensors, numbers, dicts or lists; found {}" 60 | 61 | def gather_map(outputs): 62 | out = outputs[0] 63 | elem_type = type(out) 64 | if isinstance(out, Variable): 65 | return Gather.apply(target_device, dim, *outputs) 66 | if out is None: 67 | return None 68 | if isinstance(out, collections.Sequence): 69 | return type(out)(map(gather_map, zip(*outputs))) 70 | elif isinstance(out, collections.Mapping): 71 | return {key: gather_map([d[key] for d in outputs]) for key in out} 72 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 73 | and elem_type.__name__ != 'string_': 74 | elem = out 75 | if elem_type.__name__ == 'ndarray': 76 | # array of string classes and object 77 | if re.search('[SaUO]', elem.dtype.str) is not None: 78 | raise TypeError(error_msg.format(elem.dtype)) 79 | 80 | return Variable(torch.from_numpy(np.concatenate(outputs, dim))) 81 | if elem.shape == (): # scalars 82 | py_type = float if elem.dtype.name.startswith('float') else int 83 | return Variable(numpy_type_map[elem.dtype.name](list(map(py_type, outputs)))) 84 | elif isinstance(out, int_classes): 85 | return Variable(torch.LongTensor(outputs)) 86 | elif isinstance(out, float): 87 | return Variable(torch.DoubleTensor(outputs)) 88 | elif isinstance(out, string_classes): 89 | return outputs 90 | 91 | raise TypeError((error_msg.format(elem_type))) 92 | 93 | # Recursive function calls like this create reference cycles. 94 | # Setting the function to None clears the refcycle. 95 | try: 96 | return gather_map(outputs) 97 | finally: 98 | gather_map = None 99 | -------------------------------------------------------------------------------- /lib/roi_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/roi_data/__init__.py -------------------------------------------------------------------------------- /lib/roi_data/data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Common utility functions for RPN and RetinaNet minibtach blobs preparation. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | from __future__ import unicode_literals 23 | 24 | from collections import namedtuple 25 | import logging 26 | import numpy as np 27 | import threading 28 | 29 | from core.config import cfg 30 | from modeling.generate_anchors import generate_anchors 31 | import utils.boxes as box_utils 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | # octave and aspect fields are only used on RetinaNet. Octave corresponds to the 37 | # scale of the anchor and aspect denotes which aspect ratio is used in the range 38 | # of aspect ratios 39 | FieldOfAnchors = namedtuple( 40 | 'FieldOfAnchors', [ 41 | 'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size', 42 | 'octave', 'aspect' 43 | ] 44 | ) 45 | 46 | # Cache for memoizing _get_field_of_anchors 47 | _threadlocal_foa = threading.local() 48 | 49 | 50 | def get_field_of_anchors( 51 | stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None 52 | ): 53 | global _threadlocal_foa 54 | if not hasattr(_threadlocal_foa, 'cache'): 55 | _threadlocal_foa.cache = {} 56 | 57 | cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) 58 | if cache_key in _threadlocal_foa.cache: 59 | return _threadlocal_foa.cache[cache_key] 60 | 61 | # Anchors at a single feature cell 62 | cell_anchors = generate_anchors( 63 | stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios 64 | ) 65 | num_cell_anchors = cell_anchors.shape[0] 66 | 67 | # Generate canonical proposals from shifted anchors 68 | # Enumerate all shifted positions on the (H, W) grid 69 | fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( 70 | cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) 71 | ) 72 | field_size = int(np.ceil(fpn_max_size / float(stride))) 73 | shifts = np.arange(0, field_size) * stride 74 | shift_x, shift_y = np.meshgrid(shifts, shifts) 75 | shift_x = shift_x.ravel() 76 | shift_y = shift_y.ravel() 77 | shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() 78 | 79 | # Broacast anchors over shifts to enumerate all anchors at all positions 80 | # in the (H, W) grid: 81 | # - add A cell anchors of shape (1, A, 4) to 82 | # - K shifts of shape (K, 1, 4) to get 83 | # - all shifted anchors of shape (K, A, 4) 84 | # - reshape to (K*A, 4) shifted anchors 85 | A = num_cell_anchors 86 | K = shifts.shape[0] 87 | field_of_anchors = ( 88 | cell_anchors.reshape((1, A, 4)) + 89 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 90 | ) 91 | field_of_anchors = field_of_anchors.reshape((K * A, 4)) 92 | foa = FieldOfAnchors( 93 | field_of_anchors=field_of_anchors.astype(np.float32), 94 | num_cell_anchors=num_cell_anchors, 95 | stride=stride, 96 | field_size=field_size, 97 | octave=octave, 98 | aspect=aspect 99 | ) 100 | _threadlocal_foa.cache[cache_key] = foa 101 | return foa 102 | 103 | 104 | def unmap(data, count, inds, fill=0): 105 | """Unmap a subset of item (data) back to the original set of items (of 106 | size count)""" 107 | if count == len(inds): 108 | return data 109 | 110 | if len(data.shape) == 1: 111 | ret = np.empty((count, ), dtype=data.dtype) 112 | ret.fill(fill) 113 | ret[inds] = data 114 | else: 115 | ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype) 116 | ret.fill(fill) 117 | ret[inds, :] = data 118 | return ret 119 | 120 | 121 | def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)): 122 | """Compute bounding-box regression targets for an image.""" 123 | return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype( 124 | np.float32, copy=False 125 | ) 126 | -------------------------------------------------------------------------------- /lib/roi_data/minibatch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from core.config import cfg 5 | import utils.blob as blob_utils 6 | import roi_data.rpn 7 | 8 | 9 | def get_minibatch_blob_names(is_training=True): 10 | """Return blob names in the order in which they are read by the data loader. 11 | """ 12 | # data blob: holds a batch of N images, each with 3 channels 13 | blob_names = ['data'] 14 | if cfg.RPN.RPN_ON: 15 | # RPN-only or end-to-end Faster R-CNN 16 | blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training) 17 | elif cfg.RETINANET.RETINANET_ON: 18 | raise NotImplementedError 19 | else: 20 | # Fast R-CNN like models trained on precomputed proposals 21 | blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names( 22 | is_training=is_training 23 | ) 24 | return blob_names 25 | 26 | 27 | def get_minibatch(roidb): 28 | """Given a roidb, construct a minibatch sampled from it.""" 29 | # We collect blobs from each image onto a list and then concat them into a 30 | # single tensor, hence we initialize each blob to an empty list 31 | blobs = {k: [] for k in get_minibatch_blob_names()} 32 | 33 | # Get the input image blob 34 | im_blob, im_scales = _get_image_blob(roidb) 35 | blobs['data'] = im_blob 36 | if cfg.RPN.RPN_ON: 37 | # RPN-only or end-to-end Faster/Mask R-CNN 38 | valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb) 39 | elif cfg.RETINANET.RETINANET_ON: 40 | raise NotImplementedError 41 | else: 42 | # Fast R-CNN like models trained on precomputed proposals 43 | valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 44 | return blobs, valid 45 | 46 | 47 | def _get_image_blob(roidb): 48 | """Builds an input blob from the images in the roidb at the specified 49 | scales. 50 | """ 51 | num_images = len(roidb) 52 | # Sample random scales to use for each image in this batch 53 | scale_inds = np.random.randint( 54 | 0, high=len(cfg.TRAIN.SCALES), size=num_images) 55 | processed_ims = [] 56 | im_scales = [] 57 | for i in range(num_images): 58 | im = cv2.imread(roidb[i]['image']) 59 | assert im is not None, \ 60 | 'Failed to read image \'{}\''.format(roidb[i]['image']) 61 | # If NOT using opencv to read in images, uncomment following lines 62 | # if len(im.shape) == 2: 63 | # im = im[:, :, np.newaxis] 64 | # im = np.concatenate((im, im, im), axis=2) 65 | # # flip the channel, since the original one using cv2 66 | # # rgb -> bgr 67 | # im = im[:, :, ::-1] 68 | if roidb[i]['flipped']: 69 | im = im[:, ::-1, :] 70 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 71 | im, im_scale = blob_utils.prep_im_for_blob( 72 | im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) 73 | im_scales.append(im_scale[0]) 74 | processed_ims.append(im[0]) 75 | 76 | # Create a blob to hold the input images [n, c, h, w] 77 | blob = blob_utils.im_list_to_blob(processed_ims) 78 | 79 | return blob, im_scales 80 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import print_function 9 | 10 | from Cython.Build import cythonize 11 | from Cython.Distutils import build_ext 12 | from setuptools import Extension 13 | from setuptools import setup 14 | 15 | import numpy as np 16 | 17 | 18 | # Obtain the numpy include directory. This logic works across numpy versions. 19 | try: 20 | numpy_include = np.get_include() 21 | except AttributeError: 22 | numpy_include = np.get_numpy_include() 23 | 24 | 25 | ext_modules = [ 26 | Extension( 27 | name='utils.cython_bbox', 28 | sources=['utils/cython_bbox.pyx'], 29 | extra_compile_args=['-Wno-cpp'], 30 | include_dirs=[numpy_include] 31 | ), 32 | Extension( 33 | name='utils.cython_nms', 34 | sources=['utils/cython_nms.pyx'], 35 | extra_compile_args=['-Wno-cpp'], 36 | include_dirs=[numpy_include] 37 | ) 38 | ] 39 | 40 | setup( 41 | name='mask_rcnn', 42 | ext_modules=cythonize(ext_modules) 43 | ) 44 | 45 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roytseng-tw/Detectron.pytorch/1b1c4ba58428b7277a45b0dce6cc1bce3744b86a/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/collections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """A simple attribute dictionary used for representing configuration options.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | 24 | class AttrDict(dict): 25 | 26 | IMMUTABLE = '__immutable__' 27 | 28 | def __init__(self, *args, **kwargs): 29 | super(AttrDict, self).__init__(*args, **kwargs) 30 | self.__dict__[AttrDict.IMMUTABLE] = False 31 | 32 | def __getattr__(self, name): 33 | if name in self.__dict__: 34 | return self.__dict__[name] 35 | elif name in self: 36 | return self[name] 37 | else: 38 | raise AttributeError(name) 39 | 40 | def __setattr__(self, name, value): 41 | if not self.__dict__[AttrDict.IMMUTABLE]: 42 | if name in self.__dict__: 43 | self.__dict__[name] = value 44 | else: 45 | self[name] = value 46 | else: 47 | raise AttributeError( 48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 49 | format(name, value) 50 | ) 51 | 52 | def immutable(self, is_immutable): 53 | """Set immutability to is_immutable and recursively apply the setting 54 | to all nested AttrDicts. 55 | """ 56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 57 | # Recursively set immutable state 58 | for v in self.__dict__.values(): 59 | if isinstance(v, AttrDict): 60 | v.immutable(is_immutable) 61 | for v in self.values(): 62 | if isinstance(v, AttrDict): 63 | v.immutable(is_immutable) 64 | 65 | def is_immutable(self): 66 | return self.__dict__[AttrDict.IMMUTABLE] 67 | -------------------------------------------------------------------------------- /lib/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """An awesome colormap for really neat visualizations.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import numpy as np 24 | 25 | 26 | def colormap(rgb=False): 27 | color_list = np.array( 28 | [ 29 | 0.000, 0.447, 0.741, 30 | 0.850, 0.325, 0.098, 31 | 0.929, 0.694, 0.125, 32 | 0.494, 0.184, 0.556, 33 | 0.466, 0.674, 0.188, 34 | 0.301, 0.745, 0.933, 35 | 0.635, 0.078, 0.184, 36 | 0.300, 0.300, 0.300, 37 | 0.600, 0.600, 0.600, 38 | 1.000, 0.000, 0.000, 39 | 1.000, 0.500, 0.000, 40 | 0.749, 0.749, 0.000, 41 | 0.000, 1.000, 0.000, 42 | 0.000, 0.000, 1.000, 43 | 0.667, 0.000, 1.000, 44 | 0.333, 0.333, 0.000, 45 | 0.333, 0.667, 0.000, 46 | 0.333, 1.000, 0.000, 47 | 0.667, 0.333, 0.000, 48 | 0.667, 0.667, 0.000, 49 | 0.667, 1.000, 0.000, 50 | 1.000, 0.333, 0.000, 51 | 1.000, 0.667, 0.000, 52 | 1.000, 1.000, 0.000, 53 | 0.000, 0.333, 0.500, 54 | 0.000, 0.667, 0.500, 55 | 0.000, 1.000, 0.500, 56 | 0.333, 0.000, 0.500, 57 | 0.333, 0.333, 0.500, 58 | 0.333, 0.667, 0.500, 59 | 0.333, 1.000, 0.500, 60 | 0.667, 0.000, 0.500, 61 | 0.667, 0.333, 0.500, 62 | 0.667, 0.667, 0.500, 63 | 0.667, 1.000, 0.500, 64 | 1.000, 0.000, 0.500, 65 | 1.000, 0.333, 0.500, 66 | 1.000, 0.667, 0.500, 67 | 1.000, 1.000, 0.500, 68 | 0.000, 0.333, 1.000, 69 | 0.000, 0.667, 1.000, 70 | 0.000, 1.000, 1.000, 71 | 0.333, 0.000, 1.000, 72 | 0.333, 0.333, 1.000, 73 | 0.333, 0.667, 1.000, 74 | 0.333, 1.000, 1.000, 75 | 0.667, 0.000, 1.000, 76 | 0.667, 0.333, 1.000, 77 | 0.667, 0.667, 1.000, 78 | 0.667, 1.000, 1.000, 79 | 1.000, 0.000, 1.000, 80 | 1.000, 0.333, 1.000, 81 | 1.000, 0.667, 1.000, 82 | 0.167, 0.000, 0.000, 83 | 0.333, 0.000, 0.000, 84 | 0.500, 0.000, 0.000, 85 | 0.667, 0.000, 0.000, 86 | 0.833, 0.000, 0.000, 87 | 1.000, 0.000, 0.000, 88 | 0.000, 0.167, 0.000, 89 | 0.000, 0.333, 0.000, 90 | 0.000, 0.500, 0.000, 91 | 0.000, 0.667, 0.000, 92 | 0.000, 0.833, 0.000, 93 | 0.000, 1.000, 0.000, 94 | 0.000, 0.000, 0.167, 95 | 0.000, 0.000, 0.333, 96 | 0.000, 0.000, 0.500, 97 | 0.000, 0.000, 0.667, 98 | 0.000, 0.000, 0.833, 99 | 0.000, 0.000, 1.000, 100 | 0.000, 0.000, 0.000, 101 | 0.143, 0.143, 0.143, 102 | 0.286, 0.286, 0.286, 103 | 0.429, 0.429, 0.429, 104 | 0.571, 0.571, 0.571, 105 | 0.714, 0.714, 0.714, 106 | 0.857, 0.857, 0.857, 107 | 1.000, 1.000, 1.000 108 | ] 109 | ).astype(np.float32) 110 | color_list = color_list.reshape((-1, 3)) * 255 111 | if not rgb: 112 | color_list = color_list[:, ::-1] 113 | return color_list 114 | -------------------------------------------------------------------------------- /lib/utils/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /lib/utils/detectron_weight_helper.py: -------------------------------------------------------------------------------- 1 | """Helper functions for loading pretrained weights from Detectron pickle files 2 | """ 3 | 4 | import pickle 5 | import re 6 | import torch 7 | 8 | 9 | def load_detectron_weight(net, detectron_weight_file): 10 | name_mapping, orphan_in_detectron = net.detectron_weight_mapping 11 | 12 | with open(detectron_weight_file, 'rb') as fp: 13 | src_blobs = pickle.load(fp, encoding='latin1') 14 | if 'blobs' in src_blobs: 15 | src_blobs = src_blobs['blobs'] 16 | 17 | params = net.state_dict() 18 | for p_name, p_tensor in params.items(): 19 | d_name = name_mapping[p_name] 20 | if isinstance(d_name, str): # maybe str, None or True 21 | p_tensor.copy_(torch.Tensor(src_blobs[d_name])) 22 | 23 | 24 | def resnet_weights_name_pattern(): 25 | pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+") 26 | return pattern 27 | 28 | 29 | if __name__ == '__main__': 30 | """Testing""" 31 | from pprint import pprint 32 | import sys 33 | sys.path.insert(0, '..') 34 | from modeling.model_builder import Generalized_RCNN 35 | from core.config import cfg, cfg_from_file 36 | 37 | cfg.MODEL.NUM_CLASSES = 81 38 | cfg_from_file('../../cfgs/res50_mask.yml') 39 | net = Generalized_RCNN() 40 | 41 | # pprint(list(net.state_dict().keys()), width=1) 42 | 43 | mapping, orphans = net.detectron_weight_mapping 44 | state_dict = net.state_dict() 45 | 46 | for k in mapping.keys(): 47 | assert k in state_dict, '%s' % k 48 | 49 | rest = set(state_dict.keys()) - set(mapping.keys()) 50 | assert len(rest) == 0 51 | -------------------------------------------------------------------------------- /lib/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Environment helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import os 24 | import sys 25 | 26 | # Default value of the CMake install prefix 27 | _CMAKE_INSTALL_PREFIX = '/usr/local' 28 | 29 | 30 | def get_runtime_dir(): 31 | """Retrieve the path to the runtime directory.""" 32 | return os.getcwd() 33 | 34 | 35 | def get_py_bin_ext(): 36 | """Retrieve python binary extension.""" 37 | return '.py' 38 | 39 | 40 | def set_up_matplotlib(): 41 | """Set matplotlib up.""" 42 | import matplotlib 43 | # Use a non-interactive backend 44 | matplotlib.use('Agg') 45 | 46 | 47 | def exit_on_error(): 48 | """Exit from a detectron tool when there's an error.""" 49 | sys.exit(1) 50 | -------------------------------------------------------------------------------- /lib/utils/fpn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import utils.boxes as box_utils 4 | from core.config import cfg 5 | 6 | 7 | # ---------------------------------------------------------------------------- # 8 | # Helper functions for working with multilevel FPN RoIs 9 | # ---------------------------------------------------------------------------- # 10 | 11 | def map_rois_to_fpn_levels(rois, k_min, k_max): 12 | """Determine which FPN level each RoI in a set of RoIs should map to based 13 | on the heuristic in the FPN paper. 14 | """ 15 | # Compute level ids 16 | areas, neg_idx = box_utils.boxes_area(rois) 17 | areas[neg_idx] = 0 # np.sqrt will remove the entries with negative value 18 | s = np.sqrt(areas) 19 | s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 20 | lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 21 | 22 | # Eqn.(1) in FPN paper 23 | target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) 24 | target_lvls = np.clip(target_lvls, k_min, k_max) 25 | 26 | # Mark to discard negative area roi. See utils.fpn.add_multilevel_roi_blobs 27 | # target_lvls[neg_idx] = -1 28 | return target_lvls 29 | 30 | 31 | def add_multilevel_roi_blobs( 32 | blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max 33 | ): 34 | """Add RoI blobs for multiple FPN levels to the blobs dict. 35 | 36 | blobs: a dict mapping from blob name to numpy ndarray 37 | blob_prefix: name prefix to use for the FPN blobs 38 | rois: the source rois as a 2D numpy array of shape (N, 5) where each row is 39 | an roi and the columns encode (batch_idx, x1, y1, x2, y2) 40 | target_lvls: numpy array of shape (N, ) indicating which FPN level each roi 41 | in rois should be assigned to. -1 means correspoind roi should be discarded. 42 | lvl_min: the finest (highest resolution) FPN level (e.g., 2) 43 | lvl_max: the coarest (lowest resolution) FPN level (e.g., 6) 44 | """ 45 | rois_idx_order = np.empty((0, )) 46 | rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert 47 | # target_lvls = remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls) 48 | for lvl in range(lvl_min, lvl_max + 1): 49 | idx_lvl = np.where(target_lvls == lvl)[0] 50 | blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :] 51 | rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) 52 | rois_stacked = np.vstack( 53 | [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] 54 | ) 55 | rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) 56 | blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore 57 | # Sanity check that restore order is correct 58 | assert (rois_stacked[rois_idx_restore] == rois).all() 59 | 60 | 61 | def remove_negative_area_roi_blobs(blobs, blob_prefix, rois, target_lvls): 62 | """ Delete roi entries that have negative area (Uncompleted) """ 63 | idx_neg = np.where(target_lvls == -1)[0] 64 | rois = np.delete(rois, idx_neg, axis=0) 65 | blobs[blob_prefix] = rois 66 | target_lvls = np.delete(target_lvls, idx_neg, axis=0) 67 | #TODO: other blobs in faster_rcnn.get_fast_rcnn_blob_names should also be modified 68 | return target_lvls 69 | -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Image helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import numpy as np 25 | 26 | 27 | def aspect_ratio_rel(im, aspect_ratio): 28 | """Performs width-relative aspect ratio transformation.""" 29 | im_h, im_w = im.shape[:2] 30 | im_ar_w = int(round(aspect_ratio * im_w)) 31 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h)) 32 | return im_ar 33 | 34 | 35 | def aspect_ratio_abs(im, aspect_ratio): 36 | """Performs absolute aspect ratio transformation.""" 37 | im_h, im_w = im.shape[:2] 38 | im_area = im_h * im_w 39 | 40 | im_ar_w = np.sqrt(im_area * aspect_ratio) 41 | im_ar_h = np.sqrt(im_area / aspect_ratio) 42 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio) 43 | 44 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h))) 45 | return im_ar 46 | -------------------------------------------------------------------------------- /lib/utils/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Utilities for logging.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from collections import deque 24 | from email.mime.text import MIMEText 25 | import json 26 | import logging 27 | import numpy as np 28 | import smtplib 29 | import sys 30 | 31 | from core.config import cfg 32 | 33 | # Print lower precision floating point values than default FLOAT_REPR 34 | # Note! Has no use for json encode with C speedups 35 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f') 36 | 37 | 38 | def log_json_stats(stats, sort_keys=True): 39 | print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys))) 40 | 41 | 42 | def log_stats(stats, misc_args): 43 | """Log training statistics to terminal""" 44 | if hasattr(misc_args, 'epoch'): 45 | lines = "[%s][%s][Epoch %d][Iter %d / %d]\n" % ( 46 | misc_args.run_name, misc_args.cfg_filename, 47 | misc_args.epoch, misc_args.step, misc_args.iters_per_epoch) 48 | else: 49 | lines = "[%s][%s][Step %d / %d]\n" % ( 50 | misc_args.run_name, misc_args.cfg_filename, stats['iter'], cfg.SOLVER.MAX_ITER) 51 | 52 | lines += "\t\tloss: %.6f, lr: %.6f time: %.6f, eta: %s\n" % ( 53 | stats['loss'], stats['lr'], stats['time'], stats['eta'] 54 | ) 55 | if stats['metrics']: 56 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['metrics'].items()) + "\n" 57 | if stats['head_losses']: 58 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['head_losses'].items()) + "\n" 59 | if cfg.RPN.RPN_ON: 60 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_losses'].items()) + "\n" 61 | if cfg.FPN.FPN_ON: 62 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_cls_losses'].items()) + "\n" 63 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['rpn_fpn_bbox_losses'].items()) + "\n" 64 | print(lines[:-1]) # remove last new line 65 | 66 | 67 | class SmoothedValue(object): 68 | """Track a series of values and provide access to smoothed values over a 69 | window or the global series average. 70 | """ 71 | 72 | def __init__(self, window_size): 73 | self.deque = deque(maxlen=window_size) 74 | self.series = [] 75 | self.total = 0.0 76 | self.count = 0 77 | 78 | def AddValue(self, value): 79 | self.deque.append(value) 80 | self.series.append(value) 81 | self.count += 1 82 | self.total += value 83 | 84 | def GetMedianValue(self): 85 | return np.median(self.deque) 86 | 87 | def GetAverageValue(self): 88 | return np.mean(self.deque) 89 | 90 | def GetGlobalAverageValue(self): 91 | return self.total / self.count 92 | 93 | 94 | def send_email(subject, body, to): 95 | s = smtplib.SMTP('localhost') 96 | mime = MIMEText(body) 97 | mime['Subject'] = subject 98 | mime['To'] = to 99 | s.sendmail('detectron', to, mime.as_string()) 100 | 101 | 102 | def setup_logging(name): 103 | FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s' 104 | # Manually clear root loggers to prevent any module that may have called 105 | # logging.basicConfig() from blocking our logging setup 106 | logging.root.handlers = [] 107 | logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) 108 | logger = logging.getLogger(name) 109 | return logger 110 | -------------------------------------------------------------------------------- /lib/utils/resnet_weights_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for converting resnet pretrained weights from other formats 3 | """ 4 | import os 5 | import pickle 6 | 7 | import torch 8 | 9 | import nn as mynn 10 | import utils.detectron_weight_helper as dwh 11 | from core.config import cfg 12 | 13 | 14 | def load_pretrained_imagenet_weights(model): 15 | """Load pretrained weights 16 | Args: 17 | num_layers: 50 for res50 and so on. 18 | model: the generalized rcnnn module 19 | """ 20 | _, ext = os.path.splitext(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS) 21 | if ext == '.pkl': 22 | with open(cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS, 'rb') as fp: 23 | src_blobs = pickle.load(fp, encoding='latin1') 24 | if 'blobs' in src_blobs: 25 | src_blobs = src_blobs['blobs'] 26 | pretrianed_state_dict = src_blobs 27 | else: 28 | weights_file = os.path.join(cfg.ROOT_DIR, cfg.RESNETS.IMAGENET_PRETRAINED_WEIGHTS) 29 | pretrianed_state_dict = convert_state_dict(torch.load(weights_file)) 30 | 31 | # Convert batchnorm weights 32 | for name, mod in model.named_modules(): 33 | if isinstance(mod, mynn.AffineChannel2d): 34 | if cfg.FPN.FPN_ON: 35 | pretrianed_name = name.split('.', 2)[-1] 36 | else: 37 | pretrianed_name = name.split('.', 1)[-1] 38 | bn_mean = pretrianed_state_dict[pretrianed_name + '.running_mean'] 39 | bn_var = pretrianed_state_dict[pretrianed_name + '.running_var'] 40 | scale = pretrianed_state_dict[pretrianed_name + '.weight'] 41 | bias = pretrianed_state_dict[pretrianed_name + '.bias'] 42 | std = torch.sqrt(bn_var + 1e-5) 43 | new_scale = scale / std 44 | new_bias = bias - bn_mean * scale / std 45 | pretrianed_state_dict[pretrianed_name + '.weight'] = new_scale 46 | pretrianed_state_dict[pretrianed_name + '.bias'] = new_bias 47 | 48 | model_state_dict = model.state_dict() 49 | 50 | pattern = dwh.resnet_weights_name_pattern() 51 | 52 | name_mapping, _ = model.detectron_weight_mapping 53 | 54 | for k, v in name_mapping.items(): 55 | if isinstance(v, str): # maybe a str, None or True 56 | if pattern.match(v): 57 | if cfg.FPN.FPN_ON: 58 | pretrianed_key = k.split('.', 2)[-1] 59 | else: 60 | pretrianed_key = k.split('.', 1)[-1] 61 | if ext == '.pkl': 62 | model_state_dict[k].copy_(torch.Tensor(pretrianed_state_dict[v])) 63 | else: 64 | model_state_dict[k].copy_(pretrianed_state_dict[pretrianed_key]) 65 | 66 | 67 | def convert_state_dict(src_dict): 68 | """Return the correct mapping of tensor name and value 69 | 70 | Mapping from the names of torchvision model to our resnet conv_body and box_head. 71 | """ 72 | dst_dict = {} 73 | for k, v in src_dict.items(): 74 | toks = k.split('.') 75 | if k.startswith('layer'): 76 | assert len(toks[0]) == 6 77 | res_id = int(toks[0][5]) + 1 78 | name = '.'.join(['res%d' % res_id] + toks[1:]) 79 | dst_dict[name] = v 80 | elif k.startswith('fc'): 81 | continue 82 | else: 83 | name = '.'.join(['res1'] + toks) 84 | dst_dict[name] = v 85 | return dst_dict 86 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import time 7 | 8 | 9 | class Timer(object): 10 | """A simple timer.""" 11 | 12 | def __init__(self): 13 | self.reset() 14 | 15 | def tic(self): 16 | # using time.time instead of time.clock because time time.clock 17 | # does not normalize for multithreading 18 | self.start_time = time.time() 19 | 20 | def toc(self, average=True): 21 | self.diff = time.time() - self.start_time 22 | self.total_time += self.diff 23 | self.calls += 1 24 | self.average_time = self.total_time / self.calls 25 | if average: 26 | return self.average_time 27 | else: 28 | return self.diff 29 | 30 | def reset(self): 31 | self.total_time = 0. 32 | self.calls = 0 33 | self.start_time = 0. 34 | self.diff = 0. 35 | self.average_time = 0. 36 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH 2 | 3 | Usage: 4 | import this module before import any modules under lib/ 5 | e.g 6 | import _init_paths 7 | from core.config import cfg 8 | """ 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | 14 | def add_path(path): 15 | if path not in sys.path: 16 | sys.path.insert(0, path) 17 | 18 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__))) 19 | 20 | # Add lib to PYTHONPATH 21 | lib_path = osp.join(this_dir, 'lib') 22 | add_path(lib_path) 23 | -------------------------------------------------------------------------------- /tools/download_imagenet_weights.py: -------------------------------------------------------------------------------- 1 | """Script to downlaod ImageNet pretrained weights from Google Drive 2 | 3 | Extra packages required to run the script: 4 | colorama, argparse_color_formatter 5 | """ 6 | 7 | import argparse 8 | import os 9 | import requests 10 | from argparse_color_formatter import ColorHelpFormatter 11 | from colorama import init, Fore 12 | 13 | import _init_paths # pylint: disable=unused-import 14 | from core.config import cfg 15 | 16 | 17 | def parse_args(): 18 | """Parser command line argumnets""" 19 | parser = argparse.ArgumentParser(formatter_class=ColorHelpFormatter) 20 | parser.add_argument('--output_dir', help='Directory to save downloaded weight files', 21 | default=os.path.join(cfg.DATA_DIR, 'pretrained_model')) 22 | parser.add_argument('-t', '--targets', nargs='+', metavar='file_name', 23 | help='Files to download. Allowed values are: ' + 24 | ', '.join(map(lambda s: Fore.YELLOW + s + Fore.RESET, 25 | list(PRETRAINED_WEIGHTS.keys()))), 26 | choices=list(PRETRAINED_WEIGHTS.keys()), 27 | default=list(PRETRAINED_WEIGHTS.keys())) 28 | return parser.parse_args() 29 | 30 | 31 | # ---------------------------------------------------------------------------- # 32 | # Mapping from filename to google drive file_id 33 | # ---------------------------------------------------------------------------- # 34 | PRETRAINED_WEIGHTS = { 35 | 'resnet50_caffe.pth': '1wHSvusQ1CiEMc5Nx5R8adqoHQjIDWXl1', 36 | 'resnet101_caffe.pth': '1x2fTMqLrn63EMW0VuK4GEa2eQKzvJ_7l', 37 | 'resnet152_caffe.pth': '1NSCycOb7pU0KzluH326zmyMFUU55JslF', 38 | 'vgg16_caffe.pth': '19UphT53C0Ua9JAtICnw84PPTa3sZZ_9k', 39 | } 40 | 41 | 42 | # ---------------------------------------------------------------------------- # 43 | # Helper fucntions for download file from google drive 44 | # ---------------------------------------------------------------------------- # 45 | 46 | def download_file_from_google_drive(id, destination): 47 | URL = "https://docs.google.com/uc?export=download" 48 | 49 | session = requests.Session() 50 | 51 | response = session.get(URL, params={'id': id}, stream=True) 52 | token = get_confirm_token(response) 53 | 54 | if token: 55 | params = {'id': id, 'confirm': token} 56 | response = session.get(URL, params=params, stream=True) 57 | 58 | save_response_content(response, destination) 59 | 60 | 61 | def get_confirm_token(response): 62 | for key, value in response.cookies.items(): 63 | if key.startswith('download_warning'): 64 | return value 65 | 66 | return None 67 | 68 | 69 | def save_response_content(response, destination): 70 | CHUNK_SIZE = 32768 71 | 72 | with open(destination, "wb") as f: 73 | for chunk in response.iter_content(CHUNK_SIZE): 74 | if chunk: # filter out keep-alive new chunks 75 | f.write(chunk) 76 | 77 | 78 | def main(): 79 | init() # colorama init. Only has effect on Windows 80 | args = parse_args() 81 | for filename in args.targets: 82 | file_id = PRETRAINED_WEIGHTS[filename] 83 | if not os.path.exists(args.output_dir): 84 | os.makedirs(args.output_dir) 85 | destination = os.path.join(args.output_dir, filename) 86 | download_file_from_google_drive(file_id, destination) 87 | print('Download {} to {}'.format(filename, destination)) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | """Perform inference on one or more datasets.""" 2 | 3 | import argparse 4 | import cv2 5 | import os 6 | import pprint 7 | import sys 8 | import time 9 | 10 | import torch 11 | 12 | import _init_paths # pylint: disable=unused-import 13 | from core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg 14 | from core.test_engine import run_inference 15 | import utils.logging 16 | 17 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not 18 | # thread safe and causes unwanted GPU memory allocations. 19 | cv2.ocl.setUseOpenCL(False) 20 | 21 | 22 | def parse_args(): 23 | """Parse in command line arguments""" 24 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 25 | parser.add_argument( 26 | '--dataset', 27 | help='training dataset') 28 | parser.add_argument( 29 | '--cfg', dest='cfg_file', required=True, 30 | help='optional config file') 31 | 32 | parser.add_argument( 33 | '--load_ckpt', help='path of checkpoint to load') 34 | parser.add_argument( 35 | '--load_detectron', help='path to the detectron weight pickle file') 36 | 37 | parser.add_argument( 38 | '--output_dir', 39 | help='output directory to save the testing results. If not provided, ' 40 | 'defaults to [args.load_ckpt|args.load_detectron]/../test.') 41 | 42 | parser.add_argument( 43 | '--set', dest='set_cfgs', 44 | help='set config keys, will overwrite config in the cfg_file.' 45 | ' See lib/core/config.py for all options', 46 | default=[], nargs='*') 47 | 48 | parser.add_argument( 49 | '--range', 50 | help='start (inclusive) and end (exclusive) indices', 51 | type=int, nargs=2) 52 | parser.add_argument( 53 | '--multi-gpu-testing', help='using multiple gpus for inference', 54 | action='store_true') 55 | parser.add_argument( 56 | '--vis', dest='vis', help='visualize detections', action='store_true') 57 | 58 | return parser.parse_args() 59 | 60 | 61 | if __name__ == '__main__': 62 | 63 | if not torch.cuda.is_available(): 64 | sys.exit("Need a CUDA device to run the code.") 65 | 66 | logger = utils.logging.setup_logging(__name__) 67 | args = parse_args() 68 | logger.info('Called with args:') 69 | logger.info(args) 70 | 71 | assert (torch.cuda.device_count() == 1) ^ bool(args.multi_gpu_testing) 72 | 73 | assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 74 | 'Exactly one of --load_ckpt and --load_detectron should be specified.' 75 | if args.output_dir is None: 76 | ckpt_path = args.load_ckpt if args.load_ckpt else args.load_detectron 77 | args.output_dir = os.path.join( 78 | os.path.dirname(os.path.dirname(ckpt_path)), 'test') 79 | logger.info('Automatically set output directory to %s', args.output_dir) 80 | if not os.path.exists(args.output_dir): 81 | os.makedirs(args.output_dir) 82 | 83 | cfg.VIS = args.vis 84 | 85 | if args.cfg_file is not None: 86 | merge_cfg_from_file(args.cfg_file) 87 | if args.set_cfgs is not None: 88 | merge_cfg_from_list(args.set_cfgs) 89 | 90 | if args.dataset == "coco2017": 91 | cfg.TEST.DATASETS = ('coco_2017_val',) 92 | cfg.MODEL.NUM_CLASSES = 81 93 | elif args.dataset == "keypoints_coco2017": 94 | cfg.TEST.DATASETS = ('keypoints_coco_2017_val',) 95 | cfg.MODEL.NUM_CLASSES = 2 96 | else: # For subprocess call 97 | assert cfg.TEST.DATASETS, 'cfg.TEST.DATASETS shouldn\'t be empty' 98 | assert_and_infer_cfg() 99 | 100 | logger.info('Testing with config:') 101 | logger.info(pprint.pformat(cfg)) 102 | 103 | # For test_engine.multi_gpu_test_net_on_dataset 104 | args.test_net_file, _ = os.path.splitext(__file__) 105 | # manually set args.cuda 106 | args.cuda = True 107 | 108 | run_inference( 109 | args, 110 | ind_range=args.range, 111 | multi_gpu_testing=args.multi_gpu_testing, 112 | check_expected_results=True) 113 | --------------------------------------------------------------------------------