├── lib ├── core │ └── __init__.py ├── ops │ ├── __init__.py │ ├── zero_even_op.h │ ├── zero_even_op.cc │ ├── zero_even_op.cu │ └── generate_proposal_labels.py ├── utils │ ├── __init__.py │ ├── image.py │ ├── timer.py │ ├── coordinator.py │ ├── collections.py │ ├── logging.py │ ├── cython_bbox.pyx │ └── env.py ├── datasets │ ├── __init__.py │ ├── cityscapes │ │ ├── __init__.py │ │ └── coco_to_cityscapes_id.py │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── xVOCap.m │ │ └── voc_eval.m │ ├── dummy_datasets.py │ ├── data │ │ └── README.md │ └── cityscapes_json_dataset_evaluator.py ├── roi_data │ └── __init__.py ├── modeling │ ├── __init__.py │ ├── VGG_CNN_M_1024.py │ ├── name_compat.py │ ├── VGG16.py │ └── rfcn_heads.py ├── Makefile ├── cmake │ ├── Summary.cmake │ ├── Dependencies.cmake │ └── Modules │ │ └── FindCuDNN.cmake ├── CMakeLists.txt └── setup.py ├── demo ├── 15673749081_767a7fa63a_k.jpg ├── 16004479832_a748d55f21_k.jpg ├── 17790319373_bd19b24cfc_k.jpg ├── 18124840932_e42b3e377c_k.jpg ├── 19064748793_bb942deea1_k.jpg ├── 24274813513_0cfd2ce6d0_k.jpg ├── 33823288584_1d21cf0a26_k.jpg ├── 33887522274_eebd074106_k.jpg ├── 34501842524_3c858b3080_k.jpg ├── output │ ├── 17790319373_bd19b24cfc_k_example_output.jpg │ └── 33823288584_1d21cf0a26_k_example_output.jpg └── NOTICE ├── .gitignore ├── configs ├── 12_2017_baselines │ ├── rpn_R-50-C4_1x.yaml │ ├── rpn_R-101-FPN_1x.yaml │ ├── rpn_R-50-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-50-C4_1x.yaml │ ├── e2e_faster_rcnn_R-50-C4_2x.yaml │ ├── rpn_person_only_R-101-FPN_1x.yaml │ ├── rpn_person_only_R-50-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-101-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_1x.yaml │ ├── e2e_faster_rcnn_R-50-FPN_2x.yaml │ ├── e2e_faster_rcnn_R-101-FPN_2x.yaml │ ├── retinanet_R-50-FPN_1x.yaml │ ├── retinanet_R-101-FPN_1x.yaml │ ├── retinanet_R-50-FPN_2x.yaml │ ├── retinanet_R-101-FPN_2x.yaml │ ├── rpn_X-101-32x8d-FPN_1x.yaml │ ├── rpn_X-101-64x4d-FPN_1x.yaml │ ├── rpn_person_only_X-101-32x8d-FPN_1x.yaml │ ├── rpn_person_only_X-101-64x4d-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-50-C4_1x.yaml │ ├── e2e_mask_rcnn_R-50-C4_2x.yaml │ ├── retinanet_X-101-32x8d-FPN_1x.yaml │ ├── retinanet_X-101-32x8d-FPN_2x.yaml │ ├── retinanet_X-101-64x4d-FPN_1x.yaml │ ├── retinanet_X-101-64x4d-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_1x.yaml │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ ├── e2e_mask_rcnn_R-101-FPN_2x.yaml │ ├── fast_rcnn_R-50-C4_1x.yaml │ ├── fast_rcnn_R-50-C4_2x.yaml │ ├── fast_rcnn_R-50-FPN_1x.yaml │ ├── fast_rcnn_R-50-FPN_2x.yaml │ ├── fast_rcnn_R-101-FPN_1x.yaml │ ├── fast_rcnn_R-101-FPN_2x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_R-50-FPN_s1x.yaml │ ├── e2e_keypoint_rcnn_R-101-FPN_s1x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── mask_rcnn_R-50-C4_1x.yaml │ ├── mask_rcnn_R-50-C4_2x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ ├── fast_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── fast_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── fast_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── fast_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ ├── mask_rcnn_R-50-FPN_1x.yaml │ ├── mask_rcnn_R-101-FPN_1x.yaml │ ├── mask_rcnn_R-50-FPN_2x.yaml │ ├── mask_rcnn_R-101-FPN_2x.yaml │ ├── keypoint_rcnn_R-50-FPN_1x.yaml │ ├── keypoint_rcnn_R-50-FPN_s1x.yaml │ ├── keypoint_rcnn_R-101-FPN_1x.yaml │ ├── keypoint_rcnn_R-101-FPN_s1x.yaml │ ├── mask_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── mask_rcnn_X-101-32x8d-FPN_2x.yaml │ ├── mask_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── mask_rcnn_X-101-64x4d-FPN_2x.yaml │ ├── keypoint_rcnn_X-101-32x8d-FPN_1x.yaml │ ├── keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml │ ├── keypoint_rcnn_X-101-64x4d-FPN_1x.yaml │ ├── keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml │ └── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml ├── getting_started │ ├── tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml │ ├── tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml │ └── tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml └── test_time_aug │ ├── e2e_mask_rcnn_R-50-FPN_2x.yaml │ └── keypoint_rcnn_R-50-FPN_1x.yaml ├── docker └── Dockerfile ├── .github └── issue_template.md ├── NOTICE ├── CONTRIBUTING.md ├── tools ├── convert_selective_search.py └── generate_testdev_from_test.py ├── tests ├── test_smooth_l1_loss_op.py └── test_spatial_narrow_as_op.py └── FAQ.md /lib/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/roi_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/15673749081_767a7fa63a_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/15673749081_767a7fa63a_k.jpg -------------------------------------------------------------------------------- /demo/16004479832_a748d55f21_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/16004479832_a748d55f21_k.jpg -------------------------------------------------------------------------------- /demo/17790319373_bd19b24cfc_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/17790319373_bd19b24cfc_k.jpg -------------------------------------------------------------------------------- /demo/18124840932_e42b3e377c_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/18124840932_e42b3e377c_k.jpg -------------------------------------------------------------------------------- /demo/19064748793_bb942deea1_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/19064748793_bb942deea1_k.jpg -------------------------------------------------------------------------------- /demo/24274813513_0cfd2ce6d0_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/24274813513_0cfd2ce6d0_k.jpg -------------------------------------------------------------------------------- /demo/33823288584_1d21cf0a26_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/33823288584_1d21cf0a26_k.jpg -------------------------------------------------------------------------------- /demo/33887522274_eebd074106_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/33887522274_eebd074106_k.jpg -------------------------------------------------------------------------------- /demo/34501842524_3c858b3080_k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/34501842524_3c858b3080_k.jpg -------------------------------------------------------------------------------- /demo/output/17790319373_bd19b24cfc_k_example_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/output/17790319373_bd19b24cfc_k_example_output.jpg -------------------------------------------------------------------------------- /demo/output/33823288584_1d21cf0a26_k_example_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidagarwal04/Detectron/master/demo/output/33823288584_1d21cf0a26_k_example_output.jpg -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Shared objects 7 | *.so 8 | 9 | # Distribution / packaging 10 | lib/build/ 11 | *.egg-info/ 12 | *.egg 13 | 14 | # Temporary files 15 | *.swn 16 | *.swo 17 | *.swp 18 | 19 | # Dataset symlinks 20 | lib/datasets/data/* 21 | !lib/datasets/data/README.md 22 | 23 | # Generated C files 24 | lib/utils/cython_*.c 25 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | # Don't use the --user flag for setup.py develop mode with virtualenv. 2 | DEV_USER_FLAG=$(shell python2 -c "import sys; print('' if hasattr(sys, 'real_prefix') else '--user')") 3 | 4 | .PHONY: default 5 | default: dev 6 | 7 | .PHONY: install 8 | install: 9 | python2 setup.py install 10 | 11 | .PHONY: ops 12 | ops: 13 | mkdir -p build && cd build && cmake .. && make -j$(shell nproc) 14 | 15 | .PHONY: dev 16 | dev: 17 | python2 setup.py develop $(DEV_USER_FLAG) 18 | 19 | .PHONY: clean 20 | clean: 21 | python2 setup.py develop --uninstall $(DEV_USER_FLAG) 22 | rm -rf build 23 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: rpn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | RPN: 15 | SIZES: (32, 64, 128, 256, 512) 16 | TRAIN: 17 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 18 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 19 | SCALES: (800,) 20 | MAX_SIZE: 1333 21 | TEST: 22 | DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') 23 | SCALE: 800 24 | MAX_SIZE: 1333 25 | USE_NCCL: False 26 | OUTPUT_DIR: . 27 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Caffe2 image as parent image 2 | FROM caffe2/caffe2:snapshot-py2-cuda9.0-cudnn7-ubuntu16.04 3 | 4 | RUN mv /usr/local/caffe2 /usr/local/caffe2_build 5 | ENV Caffe2_DIR /usr/local/caffe2_build 6 | 7 | ENV PYTHONPATH /usr/local/caffe2_build:${PYTHONPATH} 8 | ENV LD_LIBRARY_PATH /usr/local/caffe2_build/lib:${LD_LIBRARY_PATH} 9 | 10 | # Install Python dependencies 11 | RUN pip install numpy>=1.13 pyyaml>=3.12 matplotlib opencv-python>=3.2 setuptools Cython mock scipy 12 | 13 | # Install the COCO API 14 | RUN git clone https://github.com/cocodataset/cocoapi.git /cocoapi 15 | WORKDIR /cocoapi/PythonAPI 16 | RUN make install 17 | 18 | # Clone the Detectron repository 19 | RUN git clone https://github.com/facebookresearch/detectron /detectron 20 | 21 | # Set up Python modules 22 | WORKDIR /detectron/lib 23 | RUN make 24 | 25 | # Build custom ops 26 | RUN make ops 27 | 28 | # Go to Detectron root 29 | WORKDIR /detectron 30 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | TRAIN: 22 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 23 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 24 | SCALES: (800,) 25 | MAX_SIZE: 1333 26 | TEST: 27 | DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') 28 | SCALE: 800 29 | MAX_SIZE: 1333 30 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 31 | RPN_POST_NMS_TOP_N: 2000 32 | OUTPUT_DIR: . 33 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | TRAIN: 22 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 23 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 24 | SCALES: (800,) 25 | MAX_SIZE: 1333 26 | TEST: 27 | DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') 28 | SCALE: 800 29 | MAX_SIZE: 1333 30 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 31 | RPN_POST_NMS_TOP_N: 2000 32 | OUTPUT_DIR: . 33 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | RPN: 16 | SIZES: (32, 64, 128, 256, 512) 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | TRAIN: 21 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 22 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 23 | SCALES: (800,) 24 | MAX_SIZE: 1333 25 | IMS_PER_BATCH: 1 26 | BATCH_SIZE_PER_IM: 512 27 | TEST: 28 | DATASETS: ('coco_2014_minival',) 29 | SCALE: 800 30 | MAX_SIZE: 1333 31 | NMS: 0.5 32 | RPN_PRE_NMS_TOP_N: 6000 33 | RPN_POST_NMS_TOP_N: 1000 34 | OUTPUT_DIR: . 35 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | RPN: 16 | SIZES: (32, 64, 128, 256, 512) 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | TRAIN: 21 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 22 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 23 | SCALES: (800,) 24 | MAX_SIZE: 1333 25 | IMS_PER_BATCH: 1 26 | BATCH_SIZE_PER_IM: 512 27 | TEST: 28 | DATASETS: ('coco_2014_minival',) 29 | SCALE: 800 30 | MAX_SIZE: 1333 31 | NMS: 0.5 32 | RPN_PRE_NMS_TOP_N: 6000 33 | RPN_POST_NMS_TOP_N: 1000 34 | OUTPUT_DIR: . 35 | -------------------------------------------------------------------------------- /lib/cmake/Summary.cmake: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake 2 | 3 | # Prints configuration summary. 4 | function (detectron_print_config_summary) 5 | message(STATUS "Summary:") 6 | message(STATUS " CMake version : ${CMAKE_VERSION}") 7 | message(STATUS " CMake command : ${CMAKE_COMMAND}") 8 | message(STATUS " System name : ${CMAKE_SYSTEM_NAME}") 9 | message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") 10 | message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") 11 | message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") 12 | message(STATUS " Caffe2 version : ${CAFFE2_VERSION}") 13 | message(STATUS " Caffe2 include path : ${CAFFE2_INCLUDE_DIRS}") 14 | message(STATUS " Have CUDA : ${HAVE_CUDA}") 15 | if (${HAVE_CUDA}) 16 | message(STATUS " CUDA version : ${CUDA_VERSION}") 17 | message(STATUS " CuDNN version : ${CUDNN_VERSION}") 18 | endif() 19 | endfunction() 20 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | TRAIN: 22 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 23 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 24 | SCALES: (800,) 25 | MAX_SIZE: 1333 26 | TEST: 27 | DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') 28 | SCALE: 800 29 | MAX_SIZE: 1333 30 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 31 | RPN_POST_NMS_TOP_N: 2000 32 | OUTPUT_DIR: . 33 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | TRAIN: 22 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 23 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 24 | SCALES: (800,) 25 | MAX_SIZE: 1333 26 | TEST: 27 | DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') 28 | SCALE: 800 29 | MAX_SIZE: 1333 30 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 31 | RPN_POST_NMS_TOP_N: 2000 32 | OUTPUT_DIR: . 33 | -------------------------------------------------------------------------------- /demo/NOTICE: -------------------------------------------------------------------------------- 1 | The demo images are licensed as United States government work: 2 | https://www.usa.gov/government-works 3 | 4 | The image files were obtained on Jan 13, 2018 from the following 5 | URLs. 6 | 7 | 16004479832_a748d55f21_k.jpg 8 | https://www.flickr.com/photos/archivesnews/16004479832 9 | 10 | 18124840932_e42b3e377c_k.jpg 11 | https://www.flickr.com/photos/usnavy/18124840932 12 | 13 | 33887522274_eebd074106_k.jpg 14 | https://www.flickr.com/photos/usaid_pakistan/33887522274 15 | 16 | 15673749081_767a7fa63a_k.jpg 17 | https://www.flickr.com/photos/usnavy/15673749081 18 | 19 | 34501842524_3c858b3080_k.jpg 20 | https://www.flickr.com/photos/departmentofenergy/34501842524 21 | 22 | 24274813513_0cfd2ce6d0_k.jpg 23 | https://www.flickr.com/photos/dhsgov/24274813513 24 | 25 | 19064748793_bb942deea1_k.jpg 26 | https://www.flickr.com/photos/statephotos/19064748793 27 | 28 | 33823288584_1d21cf0a26_k.jpg 29 | https://www.flickr.com/photos/cbpphotos/33823288584 30 | 31 | 17790319373_bd19b24cfc_k.jpg 32 | https://www.flickr.com/photos/secdef/17790319373 33 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | TRAIN: 24 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 25 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 30 | TEST: 31 | DATASETS: ('coco_2014_minival',) 32 | SCALE: 800 33 | MAX_SIZE: 1333 34 | NMS: 0.5 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 1000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | TRAIN: 24 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 25 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 30 | TEST: 31 | DATASETS: ('coco_2014_minival',) 32 | SCALE: 800 33 | MAX_SIZE: 1333 34 | NMS: 0.5 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 1000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | TRAIN: 24 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 25 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 30 | TEST: 31 | DATASETS: ('coco_2014_minival',) 32 | SCALE: 800 33 | MAX_SIZE: 1333 34 | NMS: 0.5 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 1000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | TRAIN: 24 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 25 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 30 | TEST: 31 | DATASETS: ('coco_2014_minival',) 32 | SCALE: 800 33 | MAX_SIZE: 1333 34 | NMS: 0.5 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 1000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RETINANET: 21 | RETINANET_ON: True 22 | NUM_CONVS: 4 23 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 24 | SCALES_PER_OCTAVE: 3 25 | ANCHOR_SCALE: 4 26 | LOSS_GAMMA: 2.0 27 | LOSS_ALPHA: 0.25 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | RPN_STRADDLE_THRESH: -1 # default 0 34 | TEST: 35 | DATASETS: ('coco_2014_minival',) 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 2000 41 | OUTPUT_DIR: . 42 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | ## PLEASE FOLLOW THESE INSTRUCTIONS BEFORE POSTING 2 | 1. Please thoroughly read README.md, INSTALL.md, GETTING_STARTED.md, and FAQ.md 3 | 2. Please search existing *open and closed* issues in case your issue has already been reported 4 | 3. Please try to debug the issue in case you can solve it on your own before posting 5 | 6 | ## After following steps 1-3 above and agreeing to provide the detailed information requested below, you may continue with posting your issue 7 | (**Delete this line and the text above it.**) 8 | 9 | ### Expected results 10 | 11 | What did you expect to see? 12 | 13 | ### Actual results 14 | 15 | What did you observe instead? 16 | 17 | ### Detailed steps to reproduce 18 | 19 | E.g.: 20 | 21 | ``` 22 | The command that you ran 23 | ``` 24 | 25 | ### System information 26 | 27 | * Operating system: ? 28 | * Compiler version: ? 29 | * CUDA version: ? 30 | * cuDNN version: ? 31 | * NVIDIA driver version: ? 32 | * GPU models (for all devices if they are not all the same): ? 33 | * `PYTHONPATH` environment variable: ? 34 | * `python --version` output: ? 35 | * Anything else that seems relevant: ? 36 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RETINANET: 21 | RETINANET_ON: True 22 | NUM_CONVS: 4 23 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 24 | SCALES_PER_OCTAVE: 3 25 | ANCHOR_SCALE: 4 26 | LOSS_GAMMA: 2.0 27 | LOSS_ALPHA: 0.25 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | RPN_STRADDLE_THRESH: -1 # default 0 34 | TEST: 35 | DATASETS: ('coco_2014_minival',) 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 2000 41 | OUTPUT_DIR: . 42 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RETINANET: 21 | RETINANET_ON: True 22 | NUM_CONVS: 4 23 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 24 | SCALES_PER_OCTAVE: 3 25 | ANCHOR_SCALE: 4 26 | LOSS_GAMMA: 2.0 27 | LOSS_ALPHA: 0.25 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | RPN_STRADDLE_THRESH: -1 # default 0 34 | TEST: 35 | DATASETS: ('coco_2014_minival',) 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 2000 41 | OUTPUT_DIR: . 42 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RETINANET: 21 | RETINANET_ON: True 22 | NUM_CONVS: 4 23 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 24 | SCALES_PER_OCTAVE: 3 25 | ANCHOR_SCALE: 4 26 | LOSS_GAMMA: 2.0 27 | LOSS_ALPHA: 0.25 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | RPN_STRADDLE_THRESH: -1 # default 0 34 | TEST: 35 | DATASETS: ('coco_2014_minival',) 36 | SCALE: 800 37 | MAX_SIZE: 1333 38 | NMS: 0.5 39 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 40 | RPN_POST_NMS_TOP_N: 2000 41 | OUTPUT_DIR: . 42 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | RESNETS: 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | TRAIN: 27 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 28 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 29 | SCALES: (800,) 30 | MAX_SIZE: 1333 31 | TEST: 32 | DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 2000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | RESNETS: 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 64 25 | WIDTH_PER_GROUP: 4 26 | TRAIN: 27 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 28 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 29 | SCALES: (800,) 30 | MAX_SIZE: 1333 31 | TEST: 32 | DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival') 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 2000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | RESNETS: 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | TRAIN: 27 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 28 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 29 | SCALES: (800,) 30 | MAX_SIZE: 1333 31 | TEST: 32 | DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 2000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | RPN_ONLY: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_RPN: True 17 | RPN_MAX_LEVEL: 6 18 | RPN_MIN_LEVEL: 2 19 | RPN_ANCHOR_START_SIZE: 32 20 | RPN_ASPECT_RATIOS: (0.5, 1, 2) 21 | RESNETS: 22 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 23 | TRANS_FUNC: bottleneck_transformation 24 | NUM_GROUPS: 64 25 | WIDTH_PER_GROUP: 4 26 | TRAIN: 27 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 28 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 29 | SCALES: (800,) 30 | MAX_SIZE: 1333 31 | TEST: 32 | DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test') 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 36 | RPN_POST_NMS_TOP_N: 2000 37 | OUTPUT_DIR: . 38 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | MRCNN: 22 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 23 | RESOLUTION: 14 24 | ROI_XFORM_METHOD: RoIAlign 25 | ROI_XFORM_RESOLUTION: 14 26 | DILATION: 1 # default 2 27 | CONV_INIT: MSRAFill # default: GaussianFill 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | IMS_PER_BATCH: 1 34 | BATCH_SIZE_PER_IM: 512 35 | TEST: 36 | DATASETS: ('coco_2014_minival',) 37 | SCALE: 800 38 | MAX_SIZE: 1333 39 | NMS: 0.5 40 | RPN_PRE_NMS_TOP_N: 6000 41 | RPN_POST_NMS_TOP_N: 1000 42 | OUTPUT_DIR: . 43 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 14 | MAX_ITER: 360000 15 | STEPS: [0, 240000, 320000] 16 | RPN: 17 | SIZES: (32, 64, 128, 256, 512) 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | MRCNN: 22 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 23 | RESOLUTION: 14 24 | ROI_XFORM_METHOD: RoIAlign 25 | ROI_XFORM_RESOLUTION: 14 26 | DILATION: 1 # default 2 27 | CONV_INIT: MSRAFill # default: GaussianFill 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | IMS_PER_BATCH: 1 34 | BATCH_SIZE_PER_IM: 512 35 | TEST: 36 | DATASETS: ('coco_2014_minival',) 37 | SCALE: 800 38 | MAX_SIZE: 1333 39 | NMS: 0.5 40 | RPN_PRE_NMS_TOP_N: 6000 41 | RPN_POST_NMS_TOP_N: 1000 42 | OUTPUT_DIR: . 43 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | RETINANET: 26 | RETINANET_ON: True 27 | NUM_CONVS: 4 28 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 29 | SCALES_PER_OCTAVE: 3 30 | ANCHOR_SCALE: 4 31 | LOSS_GAMMA: 2.0 32 | LOSS_ALPHA: 0.25 33 | TRAIN: 34 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 35 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 36 | SCALES: (800,) 37 | MAX_SIZE: 1333 38 | RPN_STRADDLE_THRESH: -1 # default 0 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 2000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | RETINANET: 26 | RETINANET_ON: True 27 | NUM_CONVS: 4 28 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 29 | SCALES_PER_OCTAVE: 3 30 | ANCHOR_SCALE: 4 31 | LOSS_GAMMA: 2.0 32 | LOSS_ALPHA: 0.25 33 | TRAIN: 34 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 35 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 36 | SCALES: (800,) 37 | MAX_SIZE: 1333 38 | RPN_STRADDLE_THRESH: -1 # default 0 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 2000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | RETINANET: 26 | RETINANET_ON: True 27 | NUM_CONVS: 4 28 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 29 | SCALES_PER_OCTAVE: 3 30 | ANCHOR_SCALE: 4 31 | LOSS_GAMMA: 2.0 32 | LOSS_ALPHA: 0.25 33 | TRAIN: 34 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 35 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 36 | SCALES: (800,) 37 | MAX_SIZE: 1333 38 | RPN_STRADDLE_THRESH: -1 # default 0 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 2000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: retinanet 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_RPN: True 16 | RPN_MAX_LEVEL: 7 17 | RPN_MIN_LEVEL: 3 18 | COARSEST_STRIDE: 128 19 | EXTRA_CONV_LEVELS: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | RETINANET: 26 | RETINANET_ON: True 27 | NUM_CONVS: 4 28 | ASPECT_RATIOS: (1.0, 2.0, 0.5) 29 | SCALES_PER_OCTAVE: 3 30 | ANCHOR_SCALE: 4 31 | LOSS_GAMMA: 2.0 32 | LOSS_ALPHA: 0.25 33 | TRAIN: 34 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 35 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 36 | SCALES: (800,) 37 | MAX_SIZE: 1333 38 | RPN_STRADDLE_THRESH: -1 # default 0 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 10000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 2000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 31 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 37 | TEST: 38 | DATASETS: ('coco_2014_minival',) 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | OUTPUT_DIR: . 45 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 31 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 37 | TEST: 38 | DATASETS: ('coco_2014_minival',) 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | OUTPUT_DIR: . 45 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 31 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 37 | TEST: 38 | DATASETS: ('coco_2014_minival',) 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 43 | RPN_POST_NMS_TOP_N: 1000 44 | OUTPUT_DIR: . 45 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Portions of this software are derived from py-faster-rcnn. 2 | 3 | ============================================================================== 4 | py-faster-rcnn licence 5 | ============================================================================== 6 | 7 | Faster R-CNN 8 | 9 | The MIT License (MIT) 10 | 11 | Copyright (c) 2015 Microsoft Corporation 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a copy 14 | of this software and associated documentation files (the "Software"), to deal 15 | in the Software without restriction, including without limitation the rights 16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | copies of the Software, and to permit persons to whom the Software is 18 | furnished to do so, subject to the following conditions: 19 | 20 | The above copyright notice and this permission notice shall be included in 21 | all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | THE SOFTWARE. 30 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | TRAIN: 30 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 31 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 32 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 33 | SCALES: (800,) 34 | MAX_SIZE: 1333 35 | IMS_PER_BATCH: 1 36 | BATCH_SIZE_PER_IM: 512 37 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 38 | TEST: 39 | DATASETS: ('coco_2014_minival',) 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 44 | RPN_POST_NMS_TOP_N: 1000 45 | OUTPUT_DIR: . 46 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | MRCNN: 25 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 26 | RESOLUTION: 28 # (output mask resolution) default 14 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 14 # default 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 30 | DILATION: 1 # default 2 31 | CONV_INIT: MSRAFill # default GaussianFill 32 | TRAIN: 33 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 34 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 1000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | MRCNN: 25 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 26 | RESOLUTION: 28 # (output mask resolution) default 14 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 14 # default 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 30 | DILATION: 1 # default 2 31 | CONV_INIT: MSRAFill # default GaussianFill 32 | TRAIN: 33 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 34 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 1000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | MRCNN: 25 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 26 | RESOLUTION: 28 # (output mask resolution) default 14 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 14 # default 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 30 | DILATION: 1 # default 2 31 | CONV_INIT: MSRAFill # default GaussianFill 32 | TRAIN: 33 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 34 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 1000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | MRCNN: 25 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 26 | RESOLUTION: 28 # (output mask resolution) default 14 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 14 # default 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 30 | DILATION: 1 # default 2 31 | CONV_INIT: MSRAFill # default GaussianFill 32 | TRAIN: 33 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 34 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 1000 46 | OUTPUT_DIR: . 47 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | RPN: 15 | SIZES: (32, 64, 128, 256, 512) 16 | FAST_RCNN: 17 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 18 | ROI_XFORM_METHOD: RoIAlign 19 | TRAIN: 20 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 21 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 22 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') 23 | SCALES: (800,) 24 | MAX_SIZE: 1333 25 | IMS_PER_BATCH: 1 26 | BATCH_SIZE_PER_IM: 512 27 | TEST: 28 | DATASETS: ('coco_2014_minival',) 29 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) 30 | PROPOSAL_LIMIT: 1000 31 | SCALE: 800 32 | MAX_SIZE: 1333 33 | NMS: 0.5 34 | OUTPUT_DIR: . 35 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.01 10 | GAMMA: 0.1 11 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | RPN: 15 | SIZES: (32, 64, 128, 256, 512) 16 | FAST_RCNN: 17 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 18 | ROI_XFORM_METHOD: RoIAlign 19 | TRAIN: 20 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 21 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 22 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') 23 | SCALES: (800,) 24 | MAX_SIZE: 1333 25 | IMS_PER_BATCH: 1 26 | BATCH_SIZE_PER_IM: 512 27 | TEST: 28 | DATASETS: ('coco_2014_minival',) 29 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) 30 | PROPOSAL_LIMIT: 1000 31 | SCALE: 800 32 | MAX_SIZE: 1333 33 | NMS: 0.5 34 | OUTPUT_DIR: . 35 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 4 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 15000 13 | STEPS: [0, 7500, 10000] 14 | # Equivalent schedules with... 15 | # 1 GPU: 16 | # BASE_LR: 0.0025 17 | # MAX_ITER: 60000 18 | # STEPS: [0, 30000, 40000] 19 | # 2 GPUs: 20 | # BASE_LR: 0.005 21 | # MAX_ITER: 30000 22 | # STEPS: [0, 15000, 20000] 23 | # 4 GPUs: 24 | # BASE_LR: 0.01 25 | # MAX_ITER: 15000 26 | # STEPS: [0, 7500, 10000] 27 | # 8 GPUs: 28 | # BASE_LR: 0.02 29 | # MAX_ITER: 7500 30 | # STEPS: [0, 3750, 5000] 31 | FPN: 32 | FPN_ON: True 33 | MULTILEVEL_ROIS: True 34 | MULTILEVEL_RPN: True 35 | FAST_RCNN: 36 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 7 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | TRAIN: 41 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 42 | DATASETS: ('coco_2014_train',) 43 | SCALES: (500,) 44 | MAX_SIZE: 833 45 | BATCH_SIZE_PER_IM: 256 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 500 50 | MAX_SIZE: 833 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 7500 13 | STEPS: [0, 3750, 5000] 14 | # Equivalent schedules with... 15 | # 1 GPU: 16 | # BASE_LR: 0.0025 17 | # MAX_ITER: 60000 18 | # STEPS: [0, 30000, 40000] 19 | # 2 GPUs: 20 | # BASE_LR: 0.005 21 | # MAX_ITER: 30000 22 | # STEPS: [0, 15000, 20000] 23 | # 4 GPUs: 24 | # BASE_LR: 0.01 25 | # MAX_ITER: 15000 26 | # STEPS: [0, 7500, 10000] 27 | # 8 GPUs: 28 | # BASE_LR: 0.02 29 | # MAX_ITER: 7500 30 | # STEPS: [0, 3750, 5000] 31 | FPN: 32 | FPN_ON: True 33 | MULTILEVEL_ROIS: True 34 | MULTILEVEL_RPN: True 35 | FAST_RCNN: 36 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 7 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | TRAIN: 41 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 42 | DATASETS: ('coco_2014_train',) 43 | SCALES: (500,) 44 | MAX_SIZE: 833 45 | BATCH_SIZE_PER_IM: 256 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 500 50 | MAX_SIZE: 833 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 1 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.0025 11 | GAMMA: 0.1 12 | MAX_ITER: 60000 13 | STEPS: [0, 30000, 40000] 14 | # Equivalent schedules with... 15 | # 1 GPU: 16 | # BASE_LR: 0.0025 17 | # MAX_ITER: 60000 18 | # STEPS: [0, 30000, 40000] 19 | # 2 GPUs: 20 | # BASE_LR: 0.005 21 | # MAX_ITER: 30000 22 | # STEPS: [0, 15000, 20000] 23 | # 4 GPUs: 24 | # BASE_LR: 0.01 25 | # MAX_ITER: 15000 26 | # STEPS: [0, 7500, 10000] 27 | # 8 GPUs: 28 | # BASE_LR: 0.02 29 | # MAX_ITER: 7500 30 | # STEPS: [0, 3750, 5000] 31 | FPN: 32 | FPN_ON: True 33 | MULTILEVEL_ROIS: True 34 | MULTILEVEL_RPN: True 35 | FAST_RCNN: 36 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 7 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | TRAIN: 41 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 42 | DATASETS: ('coco_2014_train',) 43 | SCALES: (500,) 44 | MAX_SIZE: 833 45 | BATCH_SIZE_PER_IM: 256 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 500 50 | MAX_SIZE: 833 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | NUM_GPUS: 2 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.005 11 | GAMMA: 0.1 12 | MAX_ITER: 30000 13 | STEPS: [0, 15000, 20000] 14 | # Equivalent schedules with... 15 | # 1 GPU: 16 | # BASE_LR: 0.0025 17 | # MAX_ITER: 60000 18 | # STEPS: [0, 30000, 40000] 19 | # 2 GPUs: 20 | # BASE_LR: 0.005 21 | # MAX_ITER: 30000 22 | # STEPS: [0, 15000, 20000] 23 | # 4 GPUs: 24 | # BASE_LR: 0.01 25 | # MAX_ITER: 15000 26 | # STEPS: [0, 7500, 10000] 27 | # 8 GPUs: 28 | # BASE_LR: 0.02 29 | # MAX_ITER: 7500 30 | # STEPS: [0, 3750, 5000] 31 | FPN: 32 | FPN_ON: True 33 | MULTILEVEL_ROIS: True 34 | MULTILEVEL_RPN: True 35 | FAST_RCNN: 36 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 7 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | TRAIN: 41 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 42 | DATASETS: ('coco_2014_train',) 43 | SCALES: (500,) 44 | MAX_SIZE: 833 45 | BATCH_SIZE_PER_IM: 256 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 500 50 | MAX_SIZE: 833 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.02 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_ROIS: True 16 | MULTILEVEL_RPN: True 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | ROI_XFORM_RESOLUTION: 7 21 | ROI_XFORM_SAMPLING_RATIO: 2 22 | TRAIN: 23 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 24 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 25 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | TEST: 30 | DATASETS: ('coco_2014_minival',) 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 32 | PROPOSAL_LIMIT: 1000 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | NMS: 0.5 36 | OUTPUT_DIR: . 37 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.02 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_ROIS: True 16 | MULTILEVEL_RPN: True 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | ROI_XFORM_RESOLUTION: 7 21 | ROI_XFORM_SAMPLING_RATIO: 2 22 | TRAIN: 23 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 24 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 25 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | TEST: 30 | DATASETS: ('coco_2014_minival',) 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 32 | PROPOSAL_LIMIT: 1000 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | NMS: 0.5 36 | OUTPUT_DIR: . 37 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.02 10 | GAMMA: 0.1 11 | MAX_ITER: 90000 12 | STEPS: [0, 60000, 80000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_ROIS: True 16 | MULTILEVEL_RPN: True 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | ROI_XFORM_RESOLUTION: 7 21 | ROI_XFORM_SAMPLING_RATIO: 2 22 | TRAIN: 23 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 24 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 25 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | TEST: 30 | DATASETS: ('coco_2014_minival',) 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 32 | PROPOSAL_LIMIT: 1000 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | NMS: 0.5 36 | OUTPUT_DIR: . 37 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | BASE_LR: 0.02 10 | GAMMA: 0.1 11 | MAX_ITER: 180000 12 | STEPS: [0, 120000, 160000] 13 | FPN: 14 | FPN_ON: True 15 | MULTILEVEL_ROIS: True 16 | MULTILEVEL_RPN: True 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | ROI_XFORM_RESOLUTION: 7 21 | ROI_XFORM_SAMPLING_RATIO: 2 22 | TRAIN: 23 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 24 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 25 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 26 | SCALES: (800,) 27 | MAX_SIZE: 1333 28 | BATCH_SIZE_PER_IM: 512 29 | TEST: 30 | DATASETS: ('coco_2014_minival',) 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 32 | PROPOSAL_LIMIT: 1000 33 | SCALE: 800 34 | MAX_SIZE: 1333 35 | NMS: 0.5 36 | OUTPUT_DIR: . 37 | -------------------------------------------------------------------------------- /lib/ops/zero_even_op.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef ZERO_EVEN_OP_H_ 18 | #define ZERO_EVEN_OP_H_ 19 | 20 | #include "caffe2/core/context.h" 21 | #include "caffe2/core/operator.h" 22 | 23 | namespace caffe2 { 24 | 25 | /** 26 | * ZeroEven operator. Zeros elements at even indices of an 1D array. 27 | * Elements at odd indices are preserved. 28 | * 29 | * This toy operator is an example of a custom operator and may be a useful 30 | * reference for adding new custom operators to the Detectron codebase. 31 | */ 32 | template 33 | class ZeroEvenOp final : public Operator { 34 | public: 35 | // Introduce Operator helper members. 36 | USE_OPERATOR_CONTEXT_FUNCTIONS; 37 | 38 | ZeroEvenOp(const OperatorDef& operator_def, Workspace* ws) 39 | : Operator(operator_def, ws) {} 40 | 41 | bool RunOnDevice() override; 42 | }; 43 | 44 | } // namespace caffe2 45 | 46 | #endif // ZERO_EVEN_OP_H_ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Detectron 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | Minor changes and improvements will be released on an ongoing basis. Larger 7 | changes (e.g., changesets implementing a new paper) will be released on a more 8 | periodic basis. 9 | 10 | ## Pull Requests 11 | We actively welcome your pull requests. 12 | 13 | 1. Fork the repo and create your branch from `master`. 14 | 2. If you've added code that should be tested, add tests. 15 | 3. If you've changed APIs, update the documentation. 16 | 4. Ensure the test suite passes. 17 | 5. Make sure your code lints. 18 | 6. Ensure no regressions in baseline model speed and accuracy. 19 | 7. If you haven't already, complete the Contributor License Agreement ("CLA"). 20 | 21 | ## Contributor License Agreement ("CLA") 22 | In order to accept your pull request, we need you to submit a CLA. You only need 23 | to do this once to work on any of Facebook's open source projects. 24 | 25 | Complete your CLA here: 26 | 27 | ## Issues 28 | GitHub issues will be largely unattended and are mainly intended as a community 29 | forum for collectively debugging issues, hopefully leading to pull requests with 30 | fixes when appropriate. 31 | 32 | ## Coding Style 33 | * 4 spaces for indentation rather than tabs 34 | * 80 character line length 35 | * PEP8 formatting 36 | 37 | ## License 38 | By contributing to Detectron, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/ops/zero_even_op.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "zero_even_op.h" 18 | 19 | namespace caffe2 { 20 | 21 | template <> 22 | bool ZeroEvenOp::RunOnDevice() { 23 | // Retrieve the input tensor. 24 | const auto& X = Input(0); 25 | CAFFE_ENFORCE(X.ndim() == 1); 26 | 27 | // Initialize the output tensor to a copy of the input tensor. 28 | auto* Y = Output(0); 29 | Y->CopyFrom(X); 30 | 31 | // Set output elements at even indices to zero. 32 | auto* Y_data = Y->mutable_data(); 33 | for (auto i = 0; i < Y->size(); i += 2) { 34 | Y_data[i] = 0.0f; 35 | } 36 | 37 | return true; 38 | } 39 | 40 | REGISTER_CPU_OPERATOR(ZeroEven, ZeroEvenOp); 41 | 42 | OPERATOR_SCHEMA(ZeroEven) 43 | .NumInputs(1) 44 | .NumOutputs(1) 45 | .Input( 46 | 0, 47 | "X", 48 | "1D input tensor") 49 | .Output( 50 | 0, 51 | "Y", 52 | "1D output tensor"); 53 | 54 | } // namespace caffe2 55 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | KRCNN: 25 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 26 | NUM_STACKED_CONVS: 8 27 | NUM_KEYPOINTS: 17 28 | USE_DECONV_OUTPUT: True 29 | CONV_INIT: MSRAFill 30 | CONV_HEAD_DIM: 512 31 | UP_SCALE: 2 32 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 35 | ROI_XFORM_SAMPLING_RATIO: 2 36 | KEYPOINT_CONFIDENCE: bbox 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 39 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | DATASETS: ('keypoints_coco_2014_minival',) 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 50 | RPN_POST_NMS_TOP_N: 1000 51 | OUTPUT_DIR: . 52 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | KRCNN: 25 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 26 | NUM_STACKED_CONVS: 8 27 | NUM_KEYPOINTS: 17 28 | USE_DECONV_OUTPUT: True 29 | CONV_INIT: MSRAFill 30 | CONV_HEAD_DIM: 512 31 | UP_SCALE: 2 32 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 35 | ROI_XFORM_SAMPLING_RATIO: 2 36 | KEYPOINT_CONFIDENCE: bbox 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 39 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | DATASETS: ('keypoints_coco_2014_minival',) 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 50 | RPN_POST_NMS_TOP_N: 1000 51 | OUTPUT_DIR: . 52 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 130000 14 | STEPS: [0, 100000, 120000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | KRCNN: 25 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 26 | NUM_STACKED_CONVS: 8 27 | NUM_KEYPOINTS: 17 28 | USE_DECONV_OUTPUT: True 29 | CONV_INIT: MSRAFill 30 | CONV_HEAD_DIM: 512 31 | UP_SCALE: 2 32 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 35 | ROI_XFORM_SAMPLING_RATIO: 2 36 | KEYPOINT_CONFIDENCE: bbox 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 39 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | DATASETS: ('keypoints_coco_2014_minival',) 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 50 | RPN_POST_NMS_TOP_N: 1000 51 | OUTPUT_DIR: . 52 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 130000 14 | STEPS: [0, 100000, 120000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | KRCNN: 25 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 26 | NUM_STACKED_CONVS: 8 27 | NUM_KEYPOINTS: 17 28 | USE_DECONV_OUTPUT: True 29 | CONV_INIT: MSRAFill 30 | CONV_HEAD_DIM: 512 31 | UP_SCALE: 2 32 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 35 | ROI_XFORM_SAMPLING_RATIO: 2 36 | KEYPOINT_CONFIDENCE: bbox 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 39 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 44 | TEST: 45 | DATASETS: ('keypoints_coco_2014_minival',) 46 | SCALE: 800 47 | MAX_SIZE: 1333 48 | NMS: 0.5 49 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 50 | RPN_POST_NMS_TOP_N: 1000 51 | OUTPUT_DIR: . 52 | -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Image helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import numpy as np 25 | 26 | 27 | def aspect_ratio_rel(im, aspect_ratio): 28 | """Performs width-relative aspect ratio transformation.""" 29 | im_h, im_w = im.shape[:2] 30 | im_ar_w = int(round(aspect_ratio * im_w)) 31 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h)) 32 | return im_ar 33 | 34 | 35 | def aspect_ratio_abs(im, aspect_ratio): 36 | """Performs absolute aspect ratio transformation.""" 37 | im_h, im_w = im.shape[:2] 38 | im_area = im_h * im_w 39 | 40 | im_ar_w = np.sqrt(im_area * aspect_ratio) 41 | im_ar_h = np.sqrt(im_area / aspect_ratio) 42 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio) 43 | 44 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h))) 45 | return im_ar 46 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) 2 | 3 | # Add CMake modules. 4 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) 5 | 6 | # Add compiler flags. 7 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2 -fPIC -Wno-narrowing") 9 | 10 | # Include Caffe2 CMake utils. 11 | include(cmake/Utils.cmake) 12 | 13 | # Find dependencies. 14 | include(cmake/Dependencies.cmake) 15 | 16 | # Print configuration summary. 17 | include(cmake/Summary.cmake) 18 | detectron_print_config_summary() 19 | 20 | # Collect custom ops sources. 21 | file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc) 22 | file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cu) 23 | 24 | # Install custom CPU ops lib. 25 | add_library( 26 | caffe2_detectron_custom_ops SHARED 27 | ${CUSTOM_OPS_CPU_SRCS}) 28 | 29 | target_include_directories( 30 | caffe2_detectron_custom_ops PRIVATE 31 | ${CAFFE2_INCLUDE_DIRS}) 32 | target_link_libraries(caffe2_detectron_custom_ops caffe2) 33 | install(TARGETS caffe2_detectron_custom_ops DESTINATION lib) 34 | 35 | # Install custom GPU ops lib. 36 | if (HAVE_CUDA) 37 | # Additional -I prefix is required for CMake versions before commit (< 3.7): 38 | # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594 39 | list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS}) 40 | CUDA_ADD_LIBRARY( 41 | caffe2_detectron_custom_ops_gpu SHARED 42 | ${CUSTOM_OPS_CPU_SRCS} 43 | ${CUSTOM_OPS_GPU_SRCS}) 44 | 45 | target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu) 46 | install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib) 47 | endif() 48 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 40 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 41 | SCALES: (800,) 42 | MAX_SIZE: 1333 43 | IMS_PER_BATCH: 1 44 | BATCH_SIZE_PER_IM: 512 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | DATASETS: ('coco_2014_minival',) 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | OUTPUT_DIR: . 54 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 360000 15 | STEPS: [0, 240000, 320000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 40 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 41 | SCALES: (800,) 42 | MAX_SIZE: 1333 43 | IMS_PER_BATCH: 1 44 | BATCH_SIZE_PER_IM: 512 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | DATASETS: ('coco_2014_minival',) 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 52 | RPN_POST_NMS_TOP_N: 1000 53 | OUTPUT_DIR: . 54 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | RPN: 16 | SIZES: (32, 64, 128, 256, 512) 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | MRCNN: 21 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 22 | RESOLUTION: 14 23 | ROI_XFORM_METHOD: RoIAlign 24 | ROI_XFORM_RESOLUTION: 14 25 | DILATION: 1 # default 2 26 | CONV_INIT: MSRAFill # default: GaussianFill 27 | TRAIN: 28 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 29 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 30 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | IMS_PER_BATCH: 1 34 | BATCH_SIZE_PER_IM: 512 35 | TEST: 36 | DATASETS: ('coco_2014_minival',) 37 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) 38 | PROPOSAL_LIMIT: 1000 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | OUTPUT_DIR: . 43 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: ResNet.add_ResNet50_conv4_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | RPN: 16 | SIZES: (32, 64, 128, 256, 512) 17 | FAST_RCNN: 18 | ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head 19 | ROI_XFORM_METHOD: RoIAlign 20 | MRCNN: 21 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare 22 | RESOLUTION: 14 23 | ROI_XFORM_METHOD: RoIAlign 24 | ROI_XFORM_RESOLUTION: 14 25 | DILATION: 1 # default 2 26 | CONV_INIT: MSRAFill # default: GaussianFill 27 | TRAIN: 28 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 29 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 30 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl') 31 | SCALES: (800,) 32 | MAX_SIZE: 1333 33 | IMS_PER_BATCH: 1 34 | BATCH_SIZE_PER_IM: 512 35 | TEST: 36 | DATASETS: ('coco_2014_minival',) 37 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',) 38 | PROPOSAL_LIMIT: 1000 39 | SCALE: 800 40 | MAX_SIZE: 1333 41 | NMS: 0.5 42 | OUTPUT_DIR: . 43 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from Cython.Build import cythonize 21 | from setuptools import Extension 22 | from setuptools import setup 23 | 24 | import numpy as np 25 | 26 | _NP_INCLUDE_DIRS = np.get_include() 27 | 28 | 29 | # Extension modules 30 | ext_modules = [ 31 | Extension( 32 | name='utils.cython_bbox', 33 | sources=[ 34 | 'utils/cython_bbox.pyx' 35 | ], 36 | extra_compile_args=[ 37 | '-Wno-cpp' 38 | ], 39 | include_dirs=[ 40 | _NP_INCLUDE_DIRS 41 | ] 42 | ), 43 | Extension( 44 | name='utils.cython_nms', 45 | sources=[ 46 | 'utils/cython_nms.pyx' 47 | ], 48 | extra_compile_args=[ 49 | '-Wno-cpp' 50 | ], 51 | include_dirs=[ 52 | _NP_INCLUDE_DIRS 53 | ] 54 | ) 55 | ] 56 | 57 | setup( 58 | name='Detectron', 59 | ext_modules=cythonize(ext_modules) 60 | ) 61 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 180000 15 | STEPS: [0, 120000, 160000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 40 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 41 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 42 | SCALES: (800,) 43 | MAX_SIZE: 1333 44 | IMS_PER_BATCH: 1 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 800 50 | MAX_SIZE: 1333 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 360000 15 | STEPS: [0, 240000, 320000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 64 24 | WIDTH_PER_GROUP: 4 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 40 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 41 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 42 | SCALES: (800,) 43 | MAX_SIZE: 1333 44 | IMS_PER_BATCH: 1 45 | BATCH_SIZE_PER_IM: 512 46 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 47 | TEST: 48 | DATASETS: ('coco_2014_minival',) 49 | SCALE: 800 50 | MAX_SIZE: 1333 51 | NMS: 0.5 52 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 53 | RPN_POST_NMS_TOP_N: 1000 54 | OUTPUT_DIR: . 55 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 44 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 45 | SCALES: (640, 672, 704, 736, 768, 800) 46 | MAX_SIZE: 1333 47 | BATCH_SIZE_PER_IM: 512 48 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 49 | TEST: 50 | DATASETS: ('keypoints_coco_2014_minival',) 51 | SCALE: 800 52 | MAX_SIZE: 1333 53 | NMS: 0.5 54 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 55 | RPN_POST_NMS_TOP_N: 1000 56 | OUTPUT_DIR: . 57 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 130000 14 | STEPS: [0, 100000, 120000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 44 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 45 | SCALES: (640, 672, 704, 736, 768, 800) 46 | MAX_SIZE: 1333 47 | BATCH_SIZE_PER_IM: 512 48 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 49 | TEST: 50 | DATASETS: ('keypoints_coco_2014_minival',) 51 | SCALE: 800 52 | MAX_SIZE: 1333 53 | NMS: 0.5 54 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 55 | RPN_POST_NMS_TOP_N: 1000 56 | OUTPUT_DIR: . 57 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 32 22 | WIDTH_PER_GROUP: 8 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | TEST: 37 | DATASETS: ('coco_2014_minival',) 38 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 39 | PROPOSAL_LIMIT: 1000 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | OUTPUT_DIR: . 44 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 32 22 | WIDTH_PER_GROUP: 8 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | TEST: 37 | DATASETS: ('coco_2014_minival',) 38 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 39 | PROPOSAL_LIMIT: 1000 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | OUTPUT_DIR: . 44 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 64 22 | WIDTH_PER_GROUP: 4 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | TEST: 37 | DATASETS: ('coco_2014_minival',) 38 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 39 | PROPOSAL_LIMIT: 1000 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | OUTPUT_DIR: . 44 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | NUM_GPUS: 8 6 | SOLVER: 7 | WEIGHT_DECAY: 0.0001 8 | LR_POLICY: steps_with_decay 9 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 10 | BASE_LR: 0.01 11 | GAMMA: 0.1 12 | MAX_ITER: 360000 13 | STEPS: [0, 240000, 320000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 64 22 | WIDTH_PER_GROUP: 4 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | TRAIN: 29 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 30 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 31 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 32 | SCALES: (800,) 33 | MAX_SIZE: 1333 34 | IMS_PER_BATCH: 1 35 | BATCH_SIZE_PER_IM: 512 36 | TEST: 37 | DATASETS: ('coco_2014_minival',) 38 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 39 | PROPOSAL_LIMIT: 1000 40 | SCALE: 800 41 | MAX_SIZE: 1333 42 | NMS: 0.5 43 | OUTPUT_DIR: . 44 | -------------------------------------------------------------------------------- /lib/cmake/Dependencies.cmake: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Dependencies.cmake 2 | 3 | # Find the Caffe2 package. 4 | # Caffe2 exports the required targets, so find_package should work for 5 | # the standard Caffe2 installation. If you encounter problems with finding 6 | # the Caffe2 package, make sure you have run `make install` when installing 7 | # Caffe2 (`make install` populates your share/cmake/Caffe2). 8 | find_package(Caffe2 REQUIRED) 9 | 10 | # Find CUDA. 11 | include(cmake/Cuda.cmake) 12 | if (HAVE_CUDA) 13 | # CUDA 9.x requires GCC version <= 6 14 | if ((CUDA_VERSION VERSION_EQUAL 9.0) OR 15 | (CUDA_VERSION VERSION_GREATER 9.0 AND CUDA_VERSION VERSION_LESS 10.0)) 16 | if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND 17 | NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0 AND 18 | CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER) 19 | message(FATAL_ERROR 20 | "CUDA ${CUDA_VERSION} is not compatible with GCC version >= 7. " 21 | "Use the following option to use another version (for example): \n" 22 | " -DCUDA_HOST_COMPILER=/usr/bin/gcc-6\n") 23 | endif() 24 | # CUDA 8.0 requires GCC version <= 5 25 | elseif (CUDA_VERSION VERSION_EQUAL 8.0) 26 | if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND 27 | NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 6.0 AND 28 | CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER) 29 | message(FATAL_ERROR 30 | "CUDA 8.0 is not compatible with GCC version >= 6. " 31 | "Use the following option to use another version (for example): \n" 32 | " -DCUDA_HOST_COMPILER=/usr/bin/gcc-5\n") 33 | endif() 34 | endif() 35 | endif() 36 | 37 | # Find CUDNN. 38 | if (HAVE_CUDA) 39 | find_package(CuDNN REQUIRED) 40 | if (CUDNN_FOUND) 41 | caffe2_include_directories(${CUDNN_INCLUDE_DIRS}) 42 | endif() 43 | endif() 44 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 90000 14 | STEPS: [0, 60000, 80000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 44 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 45 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 46 | SCALES: (640, 672, 704, 736, 768, 800) 47 | MAX_SIZE: 1333 48 | BATCH_SIZE_PER_IM: 512 49 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 50 | TEST: 51 | DATASETS: ('keypoints_coco_2014_minival',) 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | NMS: 0.5 55 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 56 | RPN_POST_NMS_TOP_N: 1000 57 | OUTPUT_DIR: . 58 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | FASTER_RCNN: True 6 | KEYPOINTS_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 130000 14 | STEPS: [0, 100000, 120000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | KRCNN: 30 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 31 | NUM_STACKED_CONVS: 8 32 | NUM_KEYPOINTS: 17 33 | USE_DECONV_OUTPUT: True 34 | CONV_INIT: MSRAFill 35 | CONV_HEAD_DIM: 512 36 | UP_SCALE: 2 37 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 38 | ROI_XFORM_METHOD: RoIAlign 39 | ROI_XFORM_RESOLUTION: 14 40 | ROI_XFORM_SAMPLING_RATIO: 2 41 | KEYPOINT_CONFIDENCE: bbox 42 | TRAIN: 43 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 44 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 45 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 46 | SCALES: (640, 672, 704, 736, 768, 800) 47 | MAX_SIZE: 1333 48 | BATCH_SIZE_PER_IM: 512 49 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 50 | TEST: 51 | DATASETS: ('keypoints_coco_2014_minival',) 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | NMS: 0.5 55 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 56 | RPN_POST_NMS_TOP_N: 1000 57 | OUTPUT_DIR: . 58 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | MRCNN: 24 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 25 | RESOLUTION: 28 # (output mask resolution) default 14 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 14 # default 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 29 | DILATION: 1 # default 2 30 | CONV_INIT: MSRAFill # default GaussianFill 31 | TRAIN: 32 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 33 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 34 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | TEST: 39 | DATASETS: ('coco_2014_minival',) 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 41 | PROPOSAL_LIMIT: 1000 42 | SCALE: 800 43 | MAX_SIZE: 1333 44 | NMS: 0.5 45 | OUTPUT_DIR: . 46 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | MRCNN: 24 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 25 | RESOLUTION: 28 # (output mask resolution) default 14 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 14 # default 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 29 | DILATION: 1 # default 2 30 | CONV_INIT: MSRAFill # default GaussianFill 31 | TRAIN: 32 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 33 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 34 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | TEST: 39 | DATASETS: ('coco_2014_minival',) 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 41 | PROPOSAL_LIMIT: 1000 42 | SCALE: 800 43 | MAX_SIZE: 1333 44 | NMS: 0.5 45 | OUTPUT_DIR: . 46 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | MRCNN: 24 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 25 | RESOLUTION: 28 # (output mask resolution) default 14 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 14 # default 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 29 | DILATION: 1 # default 2 30 | CONV_INIT: MSRAFill # default GaussianFill 31 | TRAIN: 32 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 33 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 34 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | TEST: 39 | DATASETS: ('coco_2014_minival',) 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 41 | PROPOSAL_LIMIT: 1000 42 | SCALE: 800 43 | MAX_SIZE: 1333 44 | NMS: 0.5 45 | OUTPUT_DIR: . 46 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 180000 13 | STEPS: [0, 120000, 160000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | MRCNN: 24 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 25 | RESOLUTION: 28 # (output mask resolution) default 14 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 14 # default 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 29 | DILATION: 1 # default 2 30 | CONV_INIT: MSRAFill # default GaussianFill 31 | TRAIN: 32 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 33 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 34 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | TEST: 39 | DATASETS: ('coco_2014_minival',) 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 41 | PROPOSAL_LIMIT: 1000 42 | SCALE: 800 43 | MAX_SIZE: 1333 44 | NMS: 0.5 45 | OUTPUT_DIR: . 46 | -------------------------------------------------------------------------------- /lib/ops/zero_even_op.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "caffe2/core/context_gpu.h" 18 | 19 | #include "zero_even_op.h" 20 | 21 | namespace caffe2 { 22 | 23 | namespace { 24 | 25 | template 26 | __global__ void SetEvenIndsToVal(size_t num_even_inds, T val, T* data) { 27 | CUDA_1D_KERNEL_LOOP(i, num_even_inds) { 28 | data[i << 1] = val; 29 | } 30 | } 31 | 32 | } // namespace 33 | 34 | template <> 35 | bool ZeroEvenOp::RunOnDevice() { 36 | // Retrieve the input tensor. 37 | const auto& X = Input(0); 38 | CAFFE_ENFORCE(X.ndim() == 1); 39 | 40 | // Initialize the output tensor to a copy of the input tensor. 41 | auto* Y = Output(0); 42 | Y->CopyFrom(X); 43 | 44 | // Set output elements at even indices to zero. 45 | auto output_size = Y->size(); 46 | 47 | if (output_size > 0) { 48 | size_t num_even_inds = output_size / 2 + output_size % 2; 49 | SetEvenIndsToVal 50 | <<>>( 54 | num_even_inds, 55 | 0.0f, 56 | Y->mutable_data()); 57 | } 58 | 59 | return true; 60 | } 61 | 62 | REGISTER_CUDA_OPERATOR(ZeroEven, ZeroEvenOp); 63 | 64 | } // namespace caffe2 65 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick 22 | # -------------------------------------------------------- 23 | 24 | """Timing related functions.""" 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | from __future__ import unicode_literals 30 | 31 | import time 32 | 33 | 34 | class Timer(object): 35 | """A simple timer.""" 36 | 37 | def __init__(self): 38 | self.reset() 39 | 40 | def tic(self): 41 | # using time.time instead of time.clock because time time.clock 42 | # does not normalize for multithreading 43 | self.start_time = time.time() 44 | 45 | def toc(self, average=True): 46 | self.diff = time.time() - self.start_time 47 | self.total_time += self.diff 48 | self.calls += 1 49 | self.average_time = self.total_time / self.calls 50 | if average: 51 | return self.average_time 52 | else: 53 | return self.diff 54 | 55 | def reset(self): 56 | self.total_time = 0. 57 | self.calls = 0 58 | self.start_time = 0. 59 | self.diff = 0. 60 | self.average_time = 0. 61 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | KRCNN: 24 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 25 | NUM_STACKED_CONVS: 8 26 | NUM_KEYPOINTS: 17 27 | USE_DECONV_OUTPUT: True 28 | CONV_INIT: MSRAFill 29 | CONV_HEAD_DIM: 512 30 | UP_SCALE: 2 31 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 34 | ROI_XFORM_SAMPLING_RATIO: 2 35 | KEYPOINT_CONFIDENCE: bbox 36 | TRAIN: 37 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 38 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 39 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | TEST: 44 | DATASETS: ('keypoints_coco_2014_minival',) 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 46 | PROPOSAL_LIMIT: 1000 47 | SCALE: 800 48 | MAX_SIZE: 1333 49 | NMS: 0.5 50 | OUTPUT_DIR: . 51 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | KRCNN: 24 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 25 | NUM_STACKED_CONVS: 8 26 | NUM_KEYPOINTS: 17 27 | USE_DECONV_OUTPUT: True 28 | CONV_INIT: MSRAFill 29 | CONV_HEAD_DIM: 512 30 | UP_SCALE: 2 31 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 34 | ROI_XFORM_SAMPLING_RATIO: 2 35 | KEYPOINT_CONFIDENCE: bbox 36 | TRAIN: 37 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 38 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 39 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | TEST: 44 | DATASETS: ('keypoints_coco_2014_minival',) 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 46 | PROPOSAL_LIMIT: 1000 47 | SCALE: 800 48 | MAX_SIZE: 1333 49 | NMS: 0.5 50 | OUTPUT_DIR: . 51 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | KRCNN: 24 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 25 | NUM_STACKED_CONVS: 8 26 | NUM_KEYPOINTS: 17 27 | USE_DECONV_OUTPUT: True 28 | CONV_INIT: MSRAFill 29 | CONV_HEAD_DIM: 512 30 | UP_SCALE: 2 31 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 34 | ROI_XFORM_SAMPLING_RATIO: 2 35 | KEYPOINT_CONFIDENCE: bbox 36 | TRAIN: 37 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 38 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 39 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | TEST: 44 | DATASETS: ('keypoints_coco_2014_minival',) 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 46 | PROPOSAL_LIMIT: 1000 47 | SCALE: 800 48 | MAX_SIZE: 1333 49 | NMS: 0.5 50 | OUTPUT_DIR: . 51 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | KRCNN: 24 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 25 | NUM_STACKED_CONVS: 8 26 | NUM_KEYPOINTS: 17 27 | USE_DECONV_OUTPUT: True 28 | CONV_INIT: MSRAFill 29 | CONV_HEAD_DIM: 512 30 | UP_SCALE: 2 31 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 34 | ROI_XFORM_SAMPLING_RATIO: 2 35 | KEYPOINT_CONFIDENCE: bbox 36 | TRAIN: 37 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-101.pkl 38 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 39 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | TEST: 44 | DATASETS: ('keypoints_coco_2014_minival',) 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 46 | PROPOSAL_LIMIT: 1000 47 | SCALE: 800 48 | MAX_SIZE: 1333 49 | NMS: 0.5 50 | OUTPUT_DIR: . 51 | -------------------------------------------------------------------------------- /tools/convert_selective_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | # Copyright (c) 2017-present, Facebook, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ############################################################################## 17 | 18 | """Script to convert Selective Search proposal boxes into the Detectron proposal 19 | file format. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | from __future__ import unicode_literals 26 | 27 | import cPickle as pickle 28 | import numpy as np 29 | import scipy.io as sio 30 | import sys 31 | 32 | from datasets.json_dataset import JsonDataset 33 | 34 | if __name__ == '__main__': 35 | dataset_name = sys.argv[1] 36 | file_in = sys.argv[2] 37 | file_out = sys.argv[3] 38 | 39 | ds = JsonDataset(dataset_name) 40 | roidb = ds.get_roidb() 41 | raw_data = sio.loadmat(file_in)['boxes'].ravel() 42 | assert raw_data.shape[0] == len(roidb) 43 | 44 | boxes = [] 45 | scores = [] 46 | ids = [] 47 | for i in range(raw_data.shape[0]): 48 | if i % 1000 == 0: 49 | print('{}/{}'.format(i + 1, len(roidb))) 50 | # selective search boxes are 1-indexed and (y1, x1, y2, x2) 51 | i_boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 52 | boxes.append(i_boxes.astype(np.float32)) 53 | scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32)) 54 | ids.append(roidb[i]['id']) 55 | 56 | with open(file_out, 'wb') as f: 57 | pickle.dump( 58 | dict(boxes=boxes, scores=scores, indexes=ids), f, 59 | pickle.HIGHEST_PROTOCOL 60 | ) 61 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | MRCNN: 30 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 31 | RESOLUTION: 28 # (output mask resolution) default 14 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 # default 7 34 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 35 | DILATION: 1 # default 2 36 | CONV_INIT: MSRAFill # default GaussianFill 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 39 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 41 | SCALES: (800,) 42 | MAX_SIZE: 1333 43 | IMS_PER_BATCH: 1 44 | BATCH_SIZE_PER_IM: 512 45 | TEST: 46 | DATASETS: ('coco_2014_minival',) 47 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 48 | PROPOSAL_LIMIT: 1000 49 | SCALE: 800 50 | MAX_SIZE: 1333 51 | NMS: 0.5 52 | OUTPUT_DIR: . 53 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 32 23 | WIDTH_PER_GROUP: 8 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | MRCNN: 30 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 31 | RESOLUTION: 28 # (output mask resolution) default 14 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 # default 7 34 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 35 | DILATION: 1 # default 2 36 | CONV_INIT: MSRAFill # default GaussianFill 37 | TRAIN: 38 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 39 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 40 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 41 | SCALES: (800,) 42 | MAX_SIZE: 1333 43 | IMS_PER_BATCH: 1 44 | BATCH_SIZE_PER_IM: 512 45 | TEST: 46 | DATASETS: ('coco_2014_minival',) 47 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 48 | PROPOSAL_LIMIT: 1000 49 | SCALE: 800 50 | MAX_SIZE: 1333 51 | NMS: 0.5 52 | OUTPUT_DIR: . 53 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 1x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | MRCNN: 30 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 31 | RESOLUTION: 28 # (output mask resolution) default 14 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 # default 7 34 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 35 | DILATION: 1 # default 2 36 | CONV_INIT: MSRAFill # default GaussianFill 37 | TRAIN: 38 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 39 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 40 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 41 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 42 | SCALES: (800,) 43 | MAX_SIZE: 1333 44 | IMS_PER_BATCH: 1 45 | BATCH_SIZE_PER_IM: 512 46 | TEST: 47 | DATASETS: ('coco_2014_minival',) 48 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 49 | PROPOSAL_LIMIT: 1000 50 | SCALE: 800 51 | MAX_SIZE: 1333 52 | NMS: 0.5 53 | OUTPUT_DIR: . 54 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 81 5 | MASK_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | # 2x schedule (note TRAIN.IMS_PER_BATCH: 1) 11 | BASE_LR: 0.01 12 | GAMMA: 0.1 13 | MAX_ITER: 360000 14 | STEPS: [0, 240000, 320000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | RESNETS: 20 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 21 | TRANS_FUNC: bottleneck_transformation 22 | NUM_GROUPS: 64 23 | WIDTH_PER_GROUP: 4 24 | FAST_RCNN: 25 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 26 | ROI_XFORM_METHOD: RoIAlign 27 | ROI_XFORM_RESOLUTION: 7 28 | ROI_XFORM_SAMPLING_RATIO: 2 29 | MRCNN: 30 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 31 | RESOLUTION: 28 # (output mask resolution) default 14 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 # default 7 34 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 35 | DILATION: 1 # default 2 36 | CONV_INIT: MSRAFill # default GaussianFill 37 | TRAIN: 38 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 39 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 40 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 41 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 42 | SCALES: (800,) 43 | MAX_SIZE: 1333 44 | IMS_PER_BATCH: 1 45 | BATCH_SIZE_PER_IM: 512 46 | TEST: 47 | DATASETS: ('coco_2014_minival',) 48 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 49 | PROPOSAL_LIMIT: 1000 50 | SCALE: 800 51 | MAX_SIZE: 1333 52 | NMS: 0.5 53 | OUTPUT_DIR: . 54 | -------------------------------------------------------------------------------- /lib/cmake/Modules/FindCuDNN.cmake: -------------------------------------------------------------------------------- 1 | # Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Modules/FindCuDNN.cmake 2 | 3 | # - Try to find cuDNN 4 | # 5 | # The following variables are optionally searched for defaults 6 | # CUDNN_ROOT_DIR: Base directory where all cuDNN components are found 7 | # 8 | # The following are set after configuration is done: 9 | # CUDNN_FOUND 10 | # CUDNN_INCLUDE_DIRS 11 | # CUDNN_LIBRARIES 12 | # CUDNN_LIBRARY_DIRS 13 | 14 | include(FindPackageHandleStandardArgs) 15 | 16 | set(CUDNN_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA cuDNN") 17 | 18 | find_path(CUDNN_INCLUDE_DIR cudnn.h 19 | HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} 20 | PATH_SUFFIXES cuda/include include) 21 | 22 | find_library(CUDNN_LIBRARY cudnn 23 | HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} 24 | PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64) 25 | 26 | find_package_handle_standard_args( 27 | CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY) 28 | 29 | if(CUDNN_FOUND) 30 | # get cuDNN version 31 | file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) 32 | string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" 33 | CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") 34 | string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" 35 | CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") 36 | string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" 37 | CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") 38 | string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" 39 | CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") 40 | string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" 41 | CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") 42 | string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" 43 | CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") 44 | # Assemble cuDNN version 45 | if(NOT CUDNN_VERSION_MAJOR) 46 | set(CUDNN_VERSION "?") 47 | else() 48 | set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") 49 | endif() 50 | 51 | set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) 52 | set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) 53 | message(STATUS "Found cuDNN: v${CUDNN_VERSION} (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})") 54 | mark_as_advanced(CUDNN_ROOT_DIR CUDNN_LIBRARY CUDNN_INCLUDE_DIR) 55 | endif() 56 | -------------------------------------------------------------------------------- /lib/ops/generate_proposal_labels.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | import logging 22 | 23 | from datasets import json_dataset 24 | from utils import blob as blob_utils 25 | import roi_data.fast_rcnn 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | class GenerateProposalLabelsOp(object): 31 | 32 | def forward(self, inputs, outputs): 33 | """See modeling.detector.GenerateProposalLabels for inputs/outputs 34 | documentation. 35 | """ 36 | # During training we reuse the data loader code. We populate roidb 37 | # entries on the fly using the rois generated by RPN. 38 | # im_info: [[im_height, im_width, im_scale], ...] 39 | rois = inputs[0].data 40 | roidb = blob_utils.deserialize(inputs[1].data) 41 | im_info = inputs[2].data 42 | im_scales = im_info[:, 2] 43 | output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() 44 | # For historical consistency with the original Faster R-CNN 45 | # implementation we are *not* filtering crowd proposals. 46 | # This choice should be investigated in the future (it likely does 47 | # not matter). 48 | json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) 49 | blobs = {k: [] for k in output_blob_names} 50 | roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) 51 | for i, k in enumerate(output_blob_names): 52 | blob_utils.py_op_copy_blob(blobs[k], outputs[i]) 53 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 32 22 | WIDTH_PER_GROUP: 8 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | KRCNN: 29 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 30 | NUM_STACKED_CONVS: 8 31 | NUM_KEYPOINTS: 17 32 | USE_DECONV_OUTPUT: True 33 | CONV_INIT: MSRAFill 34 | CONV_HEAD_DIM: 512 35 | UP_SCALE: 2 36 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 14 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | KEYPOINT_CONFIDENCE: bbox 41 | TRAIN: 42 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 43 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 44 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 45 | SCALES: (640, 672, 704, 736, 768, 800) 46 | MAX_SIZE: 1333 47 | BATCH_SIZE_PER_IM: 512 48 | TEST: 49 | DATASETS: ('keypoints_coco_2014_minival',) 50 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 51 | PROPOSAL_LIMIT: 1000 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | NMS: 0.5 55 | OUTPUT_DIR: . 56 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 32 22 | WIDTH_PER_GROUP: 8 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | KRCNN: 29 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 30 | NUM_STACKED_CONVS: 8 31 | NUM_KEYPOINTS: 17 32 | USE_DECONV_OUTPUT: True 33 | CONV_INIT: MSRAFill 34 | CONV_HEAD_DIM: 512 35 | UP_SCALE: 2 36 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 14 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | KEYPOINT_CONFIDENCE: bbox 41 | TRAIN: 42 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl 43 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 44 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 45 | SCALES: (640, 672, 704, 736, 768, 800) 46 | MAX_SIZE: 1333 47 | BATCH_SIZE_PER_IM: 512 48 | TEST: 49 | DATASETS: ('keypoints_coco_2014_minival',) 50 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 51 | PROPOSAL_LIMIT: 1000 52 | SCALE: 800 53 | MAX_SIZE: 1333 54 | NMS: 0.5 55 | OUTPUT_DIR: . 56 | -------------------------------------------------------------------------------- /lib/utils/coordinator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Coordinated access to a shared multithreading/processing queue.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import contextlib 24 | import logging 25 | import Queue 26 | import threading 27 | import traceback 28 | 29 | log = logging.getLogger(__name__) 30 | 31 | 32 | class Coordinator(object): 33 | 34 | def __init__(self): 35 | self._event = threading.Event() 36 | 37 | def request_stop(self): 38 | log.debug('Coordinator stopping') 39 | self._event.set() 40 | 41 | def should_stop(self): 42 | return self._event.is_set() 43 | 44 | def wait_for_stop(self): 45 | return self._event.wait() 46 | 47 | @contextlib.contextmanager 48 | def stop_on_exception(self): 49 | try: 50 | yield 51 | except Exception: 52 | if not self.should_stop(): 53 | traceback.print_exc() 54 | self.request_stop() 55 | 56 | 57 | def coordinated_get(coordinator, queue): 58 | while not coordinator.should_stop(): 59 | try: 60 | return queue.get(block=True, timeout=1.0) 61 | except Queue.Empty: 62 | continue 63 | raise Exception('Coordinator stopped during get()') 64 | 65 | 66 | def coordinated_put(coordinator, queue, element): 67 | while not coordinator.should_stop(): 68 | try: 69 | queue.put(element, block=True, timeout=1.0) 70 | return 71 | except Queue.Full: 72 | continue 73 | raise Exception('Coordinator stopped during put()') 74 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 64 22 | WIDTH_PER_GROUP: 4 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | KRCNN: 29 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 30 | NUM_STACKED_CONVS: 8 31 | NUM_KEYPOINTS: 17 32 | USE_DECONV_OUTPUT: True 33 | CONV_INIT: MSRAFill 34 | CONV_HEAD_DIM: 512 35 | UP_SCALE: 2 36 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 14 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | KEYPOINT_CONFIDENCE: bbox 41 | TRAIN: 42 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 43 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 44 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 46 | SCALES: (640, 672, 704, 736, 768, 800) 47 | MAX_SIZE: 1333 48 | BATCH_SIZE_PER_IM: 512 49 | TEST: 50 | DATASETS: ('keypoints_coco_2014_minival',) 51 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 52 | PROPOSAL_LIMIT: 1000 53 | SCALE: 800 54 | MAX_SIZE: 1333 55 | NMS: 0.5 56 | OUTPUT_DIR: . 57 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet101_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 130000 13 | STEPS: [0, 100000, 120000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True 18 | RESNETS: 19 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 20 | TRANS_FUNC: bottleneck_transformation 21 | NUM_GROUPS: 64 22 | WIDTH_PER_GROUP: 4 23 | FAST_RCNN: 24 | ROI_BOX_HEAD: head_builder.add_roi_2mlp_head 25 | ROI_XFORM_METHOD: RoIAlign 26 | ROI_XFORM_RESOLUTION: 7 27 | ROI_XFORM_SAMPLING_RATIO: 2 28 | KRCNN: 29 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 30 | NUM_STACKED_CONVS: 8 31 | NUM_KEYPOINTS: 17 32 | USE_DECONV_OUTPUT: True 33 | CONV_INIT: MSRAFill 34 | CONV_HEAD_DIM: 512 35 | UP_SCALE: 2 36 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 37 | ROI_XFORM_METHOD: RoIAlign 38 | ROI_XFORM_RESOLUTION: 14 39 | ROI_XFORM_SAMPLING_RATIO: 2 40 | KEYPOINT_CONFIDENCE: bbox 41 | TRAIN: 42 | # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7 43 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl 44 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 46 | SCALES: (640, 672, 704, 736, 768, 800) 47 | MAX_SIZE: 1333 48 | BATCH_SIZE_PER_IM: 512 49 | TEST: 50 | DATASETS: ('keypoints_coco_2014_minival',) 51 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 52 | PROPOSAL_LIMIT: 1000 53 | SCALE: 800 54 | MAX_SIZE: 1333 55 | NMS: 0.5 56 | OUTPUT_DIR: . 57 | -------------------------------------------------------------------------------- /configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: generalized_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet152_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1) 12 | BASE_LR: 0.01 13 | GAMMA: 0.1 14 | MAX_ITER: 260000 15 | STEPS: [0, 200000, 240000] 16 | FPN: 17 | FPN_ON: True 18 | MULTILEVEL_ROIS: True 19 | MULTILEVEL_RPN: True 20 | RESNETS: 21 | STRIDE_1X1: False # default True for MSRA; False for C2 or Torch models 22 | TRANS_FUNC: bottleneck_transformation 23 | NUM_GROUPS: 32 24 | WIDTH_PER_GROUP: 8 25 | FAST_RCNN: 26 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 30 | MRCNN: 31 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 32 | RESOLUTION: 28 # (output mask resolution) default 14 33 | ROI_XFORM_METHOD: RoIAlign 34 | ROI_XFORM_RESOLUTION: 14 # default 7 35 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 36 | DILATION: 1 # default 2 37 | CONV_INIT: MSRAFill # default GaussianFill 38 | TRAIN: 39 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl 40 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 41 | SCALES: (640, 672, 704, 736, 768, 800) # Scale jitter 42 | MAX_SIZE: 1333 43 | IMS_PER_BATCH: 1 44 | BATCH_SIZE_PER_IM: 512 45 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 46 | TEST: 47 | DATASETS: ('coco_2014_minival',) 48 | SCALE: 800 49 | MAX_SIZE: 1333 50 | NMS: 0.5 51 | BBOX_VOTE: 52 | ENABLED: True 53 | VOTE_TH: 0.9 54 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 55 | RPN_POST_NMS_TOP_N: 1000 56 | BBOX_AUG: 57 | ENABLED: True 58 | SCORE_HEUR: UNION 59 | COORD_HEUR: UNION 60 | H_FLIP: True 61 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 62 | MAX_SIZE: 2000 63 | SCALE_H_FLIP: True 64 | SCALE_SIZE_DEP: False 65 | ASPECT_RATIOS: () 66 | ASPECT_RATIO_H_FLIP: False 67 | MASK_AUG: 68 | ENABLED: True 69 | HEUR: SOFT_AVG 70 | H_FLIP: True 71 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 72 | MAX_SIZE: 2000 73 | SCALE_H_FLIP: True 74 | SCALE_SIZE_DEP: False 75 | ASPECT_RATIOS: () 76 | ASPECT_RATIO_H_FLIP: False 77 | OUTPUT_DIR: . 78 | -------------------------------------------------------------------------------- /lib/datasets/dummy_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | """Provide stub objects that can act as stand-in "dummy" datasets for simple use 16 | cases, like getting all classes in a dataset. This exists so that demos can be 17 | run without requiring users to download/install datasets first. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | 25 | from utils.collections import AttrDict 26 | 27 | 28 | def get_coco_dataset(): 29 | """A dummy COCO dataset that includes only the 'classes' field.""" 30 | ds = AttrDict() 31 | classes = [ 32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 45 | ] 46 | ds.classes = {i: name for i, name in enumerate(classes)} 47 | return ds 48 | -------------------------------------------------------------------------------- /lib/utils/collections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """A simple attribute dictionary used for representing configuration options.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | 24 | class AttrDict(dict): 25 | 26 | IMMUTABLE = '__immutable__' 27 | 28 | def __init__(self, *args, **kwargs): 29 | super(AttrDict, self).__init__(*args, **kwargs) 30 | self.__dict__[AttrDict.IMMUTABLE] = False 31 | 32 | def __getattr__(self, name): 33 | if name in self.__dict__: 34 | return self.__dict__[name] 35 | elif name in self: 36 | return self[name] 37 | else: 38 | raise AttributeError(name) 39 | 40 | def __setattr__(self, name, value): 41 | if not self.__dict__[AttrDict.IMMUTABLE]: 42 | if name in self.__dict__: 43 | self.__dict__[name] = value 44 | else: 45 | self[name] = value 46 | else: 47 | raise AttributeError( 48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 49 | format(name, value) 50 | ) 51 | 52 | def immutable(self, is_immutable): 53 | """Set immutability to is_immutable and recursively apply the setting 54 | to all nested AttrDicts. 55 | """ 56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 57 | # Recursively set immutable state 58 | for v in self.__dict__.values(): 59 | if isinstance(v, AttrDict): 60 | v.immutable(is_immutable) 61 | for v in self.values(): 62 | if isinstance(v, AttrDict): 63 | v.immutable(is_immutable) 64 | 65 | def is_immutable(self): 66 | return self.__dict__[AttrDict.IMMUTABLE] 67 | -------------------------------------------------------------------------------- /configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: mask_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 81 5 | FASTER_RCNN: True 6 | MASK_ON: True 7 | NUM_GPUS: 8 8 | SOLVER: 9 | WEIGHT_DECAY: 0.0001 10 | LR_POLICY: steps_with_decay 11 | BASE_LR: 0.02 12 | GAMMA: 0.1 13 | MAX_ITER: 180000 14 | STEPS: [0, 120000, 160000] 15 | FPN: 16 | FPN_ON: True 17 | MULTILEVEL_ROIS: True 18 | MULTILEVEL_RPN: True 19 | FAST_RCNN: 20 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 21 | ROI_XFORM_METHOD: RoIAlign 22 | ROI_XFORM_RESOLUTION: 7 23 | ROI_XFORM_SAMPLING_RATIO: 2 24 | MRCNN: 25 | ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs 26 | RESOLUTION: 28 # (output mask resolution) default 14 27 | ROI_XFORM_METHOD: RoIAlign 28 | ROI_XFORM_RESOLUTION: 14 # default 7 29 | ROI_XFORM_SAMPLING_RATIO: 2 # default 0 30 | DILATION: 1 # default 2 31 | CONV_INIT: MSRAFill # default GaussianFill 32 | TRAIN: 33 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 34 | DATASETS: ('coco_2014_train', 'coco_2014_valminusminival') 35 | SCALES: (800,) 36 | MAX_SIZE: 1333 37 | BATCH_SIZE_PER_IM: 512 38 | RPN_PRE_NMS_TOP_N: 2000 # Per FPN level 39 | TEST: 40 | DATASETS: ('coco_2014_minival',) 41 | SCALE: 800 42 | MAX_SIZE: 1333 43 | NMS: 0.5 44 | RPN_PRE_NMS_TOP_N: 1000 # Per FPN level 45 | RPN_POST_NMS_TOP_N: 1000 46 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl 47 | 48 | # -- Test time augmentation example -- # 49 | BBOX_AUG: 50 | ENABLED: True 51 | SCORE_HEUR: UNION # AVG NOTE: cannot use AVG for e2e model 52 | COORD_HEUR: UNION # AVG NOTE: cannot use AVG for e2e model 53 | H_FLIP: True 54 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 55 | MAX_SIZE: 2000 56 | SCALE_H_FLIP: True 57 | SCALE_SIZE_DEP: False 58 | AREA_TH_LO: 2500 # 50^2 59 | AREA_TH_HI: 32400 # 180^2 60 | ASPECT_RATIOS: () 61 | ASPECT_RATIO_H_FLIP: False 62 | MASK_AUG: 63 | ENABLED: True 64 | HEUR: SOFT_AVG 65 | H_FLIP: True 66 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 67 | MAX_SIZE: 2000 68 | SCALE_H_FLIP: True 69 | SCALE_SIZE_DEP: False 70 | AREA_TH: 32400 # 180^2 71 | ASPECT_RATIOS: () 72 | ASPECT_RATIO_H_FLIP: False 73 | BBOX_VOTE: 74 | ENABLED: True 75 | VOTE_TH: 0.9 76 | # -- Test time augmentation example -- # 77 | 78 | USE_NCCL: False 79 | OUTPUT_DIR: . 80 | -------------------------------------------------------------------------------- /lib/modeling/VGG_CNN_M_1024.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from core.config import cfg 24 | 25 | 26 | def add_VGG_CNN_M_1024_conv5_body(model): 27 | model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2) 28 | model.Relu('conv1', 'conv1') 29 | model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.) 30 | model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2) 31 | model.StopGradient('pool1', 'pool1') 32 | # No updates at conv1 and below (norm1 and pool1 have no params, 33 | # so we can stop gradients before them, too) 34 | model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2) 35 | model.Relu('conv2', 'conv2') 36 | model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.) 37 | model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2) 38 | model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1) 39 | model.Relu('conv3', 'conv3') 40 | model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1) 41 | model.Relu('conv4', 'conv4') 42 | model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1) 43 | blob_out = model.Relu('conv5', 'conv5') 44 | return blob_out, 512, 1. / 16. 45 | 46 | 47 | def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale): 48 | model.RoIFeatureTransform( 49 | blob_in, 50 | 'pool5', 51 | blob_rois='rois', 52 | method=cfg.FAST_RCNN.ROI_XFORM_METHOD, 53 | resolution=6, 54 | sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, 55 | spatial_scale=spatial_scale 56 | ) 57 | model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096) 58 | model.Relu('fc6', 'fc6') 59 | model.FC('fc6', 'fc7', 4096, 1024) 60 | blob_out = model.Relu('fc7', 'fc7') 61 | return blob_out, 1024 62 | -------------------------------------------------------------------------------- /lib/utils/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Utilities for logging.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from collections import deque 24 | from email.mime.text import MIMEText 25 | import json 26 | import logging 27 | import numpy as np 28 | import smtplib 29 | import sys 30 | 31 | # Print lower precision floating point values than default FLOAT_REPR 32 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f') 33 | 34 | 35 | def log_json_stats(stats, sort_keys=True): 36 | print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys))) 37 | 38 | 39 | class SmoothedValue(object): 40 | """Track a series of values and provide access to smoothed values over a 41 | window or the global series average. 42 | """ 43 | 44 | def __init__(self, window_size): 45 | self.deque = deque(maxlen=window_size) 46 | self.series = [] 47 | self.total = 0.0 48 | self.count = 0 49 | 50 | def AddValue(self, value): 51 | self.deque.append(value) 52 | self.series.append(value) 53 | self.count += 1 54 | self.total += value 55 | 56 | def GetMedianValue(self): 57 | return np.median(self.deque) 58 | 59 | def GetAverageValue(self): 60 | return np.mean(self.deque) 61 | 62 | def GetGlobalAverageValue(self): 63 | return self.total / self.count 64 | 65 | 66 | def send_email(subject, body, to): 67 | s = smtplib.SMTP('localhost') 68 | mime = MIMEText(body) 69 | mime['Subject'] = subject 70 | mime['To'] = to 71 | s.sendmail('detectron', to, mime.as_string()) 72 | 73 | 74 | def setup_logging(name): 75 | FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s' 76 | # Manually clear root loggers to prevent any module that may have called 77 | # logging.basicConfig() from blocking our logging setup 78 | logging.root.handlers = [] 79 | logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) 80 | logger = logging.getLogger(name) 81 | return logger 82 | -------------------------------------------------------------------------------- /lib/utils/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /lib/modeling/name_compat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Handle mapping from old network building function names to new names. 17 | 18 | Flexible network configuration is achieved by specifying the function name that 19 | builds a network module (e.g., the name of the conv backbone or the mask roi 20 | head). However we may wish to change names over time without breaking previous 21 | config files. This module provides backwards naming compatibility by providing 22 | a mapping from the old name to the new name. 23 | 24 | When renaming functions, it's generally a good idea to codemod existing yaml 25 | config files. An easy way to batch edit, by example, is a shell command like 26 | 27 | $ find . -name "*.yaml" -exec sed -i -e \ 28 | 's/head_builder\.add_roi_2mlp_head/fast_rcnn_heads.add_roi_2mlp_head/g' {} \; 29 | 30 | to perform the renaming: 31 | head_builder.add_roi_2mlp_head => fast_rcnn_heads.add_roi_2mlp_head 32 | """ 33 | 34 | from __future__ import absolute_import 35 | from __future__ import division 36 | from __future__ import print_function 37 | from __future__ import unicode_literals 38 | 39 | 40 | _RENAME = { 41 | # Removed "ResNet_" from the name because it wasn't relevent 42 | 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs': 43 | 'mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs', 44 | # Removed "ResNet_" from the name because it wasn't relevent 45 | 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up': 46 | 'mask_rcnn_heads.mask_rcnn_fcn_head_v1up', 47 | # Removed "ResNet_" from the name because it wasn't relevent 48 | 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0upshare': 49 | 'mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare', 50 | # Removed "ResNet_" from the name because it wasn't relevent 51 | 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0up': 52 | 'mask_rcnn_heads.mask_rcnn_fcn_head_v0up', 53 | # Removed head_builder module in favor of the more specific fast_rcnn name 54 | 'head_builder.add_roi_2mlp_head': 55 | 'fast_rcnn_heads.add_roi_2mlp_head', 56 | } 57 | 58 | 59 | def get_new_name(func_name): 60 | if func_name in _RENAME: 61 | func_name = _RENAME[func_name] 62 | return func_name 63 | -------------------------------------------------------------------------------- /lib/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Environment helper functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import os 24 | import sys 25 | 26 | # Default value of the CMake install prefix 27 | _CMAKE_INSTALL_PREFIX = '/usr/local' 28 | 29 | 30 | def get_runtime_dir(): 31 | """Retrieve the path to the runtime directory.""" 32 | return sys.path[0] 33 | 34 | 35 | def get_py_bin_ext(): 36 | """Retrieve python binary extension.""" 37 | return '.py' 38 | 39 | 40 | def set_up_matplotlib(): 41 | """Set matplotlib up.""" 42 | import matplotlib 43 | # Use a non-interactive backend 44 | matplotlib.use('Agg') 45 | 46 | 47 | def exit_on_error(): 48 | """Exit from a detectron tool when there's an error.""" 49 | sys.exit(1) 50 | 51 | 52 | def import_nccl_ops(): 53 | """Import NCCL ops.""" 54 | # There is no need to load NCCL ops since the 55 | # NCCL dependency is built into the Caffe2 gpu lib 56 | pass 57 | 58 | 59 | def get_detectron_ops_lib(): 60 | """Retrieve Detectron ops library.""" 61 | # Candidate prefixes for the detectron ops lib path 62 | prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path 63 | # Search for detectron ops lib 64 | for prefix in prefixes: 65 | ops_path = os.path.join(prefix, 'lib/libcaffe2_detectron_ops_gpu.so') 66 | if os.path.exists(ops_path): 67 | # TODO(ilijar): Switch to using a logger 68 | print('Found Detectron ops lib: {}'.format(ops_path)) 69 | break 70 | assert os.path.exists(ops_path), \ 71 | ('Detectron ops lib not found; make sure that your Caffe2 ' 72 | 'version includes Detectron module') 73 | return ops_path 74 | 75 | 76 | def get_custom_ops_lib(): 77 | """Retrieve custom ops library.""" 78 | lib_dir, _utils = os.path.split(os.path.dirname(__file__)) 79 | custom_ops_lib = os.path.join( 80 | lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so') 81 | assert os.path.exists(custom_ops_lib), \ 82 | 'Custom ops lib not found at \'{}\''.format(custom_ops_lib) 83 | return custom_ops_lib 84 | -------------------------------------------------------------------------------- /tests/test_smooth_l1_loss_op.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | import numpy as np 22 | import unittest 23 | 24 | from caffe2.proto import caffe2_pb2 25 | from caffe2.python import core 26 | from caffe2.python import gradient_checker 27 | from caffe2.python import workspace 28 | 29 | import utils.c2 30 | import utils.logging 31 | 32 | 33 | class SmoothL1LossTest(unittest.TestCase): 34 | def test_forward_and_gradient(self): 35 | Y = np.random.randn(128, 4 * 21).astype(np.float32) 36 | Y_hat = np.random.randn(128, 4 * 21).astype(np.float32) 37 | inside_weights = np.random.randn(128, 4 * 21).astype(np.float32) 38 | inside_weights[inside_weights < 0] = 0 39 | outside_weights = np.random.randn(128, 4 * 21).astype(np.float32) 40 | outside_weights[outside_weights < 0] = 0 41 | scale = np.random.random() 42 | beta = np.random.random() 43 | 44 | op = core.CreateOperator( 45 | 'SmoothL1Loss', ['Y_hat', 'Y', 'inside_weights', 'outside_weights'], 46 | ['loss'], 47 | scale=scale, 48 | beta=beta 49 | ) 50 | 51 | gc = gradient_checker.GradientChecker( 52 | stepsize=0.005, 53 | threshold=0.005, 54 | device_option=core.DeviceOption(caffe2_pb2.CUDA, 0) 55 | ) 56 | 57 | res, grad, grad_estimated = gc.CheckSimple( 58 | op, [Y_hat, Y, inside_weights, outside_weights], 0, [0] 59 | ) 60 | 61 | self.assertTrue( 62 | grad.shape == grad_estimated.shape, 63 | 'Fail check: grad.shape != grad_estimated.shape' 64 | ) 65 | 66 | # To inspect the gradient and estimated gradient: 67 | # np.set_printoptions(precision=3, suppress=True) 68 | # print('grad:') 69 | # print(grad) 70 | # print('grad_estimated:') 71 | # print(grad_estimated) 72 | 73 | self.assertTrue(res) 74 | 75 | 76 | if __name__ == '__main__': 77 | utils.c2.import_detectron_ops() 78 | assert 'SmoothL1Loss' in workspace.RegisteredOperators() 79 | utils.logging.setup_logging(__name__) 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | TYPE: keypoint_rcnn 3 | CONV_BODY: FPN.add_fpn_ResNet50_conv5_body 4 | NUM_CLASSES: 2 5 | KEYPOINTS_ON: True 6 | NUM_GPUS: 8 7 | SOLVER: 8 | WEIGHT_DECAY: 0.0001 9 | LR_POLICY: steps_with_decay 10 | BASE_LR: 0.02 11 | GAMMA: 0.1 12 | MAX_ITER: 90000 13 | STEPS: [0, 60000, 80000] 14 | FPN: 15 | FPN_ON: True 16 | MULTILEVEL_ROIS: True 17 | MULTILEVEL_RPN: True # accidentally True; disable in the future 18 | FAST_RCNN: 19 | ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head 20 | ROI_XFORM_METHOD: RoIAlign 21 | ROI_XFORM_RESOLUTION: 7 22 | ROI_XFORM_SAMPLING_RATIO: 2 23 | KRCNN: 24 | ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX 25 | NUM_STACKED_CONVS: 8 26 | NUM_KEYPOINTS: 17 27 | USE_DECONV_OUTPUT: True 28 | CONV_INIT: MSRAFill 29 | CONV_HEAD_DIM: 512 30 | UP_SCALE: 2 31 | HEATMAP_SIZE: 56 # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2) 32 | ROI_XFORM_METHOD: RoIAlign 33 | ROI_XFORM_RESOLUTION: 14 34 | ROI_XFORM_SAMPLING_RATIO: 2 35 | KEYPOINT_CONFIDENCE: bbox 36 | TRAIN: 37 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/ImageNetPretrained/MSRA/R-50.pkl 38 | DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival') 39 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl') 40 | SCALES: (640, 672, 704, 736, 768, 800) 41 | MAX_SIZE: 1333 42 | BATCH_SIZE_PER_IM: 512 43 | TEST: 44 | DATASETS: ('keypoints_coco_2014_minival',) 45 | PROPOSAL_FILES: ('https://s3-us-west-2.amazonaws.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',) 46 | PROPOSAL_LIMIT: 1000 47 | SCALE: 800 48 | MAX_SIZE: 1333 49 | NMS: 0.5 50 | WEIGHTS: https://s3-us-west-2.amazonaws.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl 51 | 52 | # -- Test time augmentation example -- # 53 | BBOX_AUG: 54 | ENABLED: True 55 | SCORE_HEUR: AVG 56 | COORD_HEUR: AVG 57 | H_FLIP: True 58 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 59 | MAX_SIZE: 2000 60 | SCALE_H_FLIP: True 61 | SCALE_SIZE_DEP: False 62 | AREA_TH_LO: 2500 # 50^2 63 | AREA_TH_HI: 32400 # 180^2 64 | KPS_AUG: 65 | ENABLED: True 66 | HEUR: HM_AVG 67 | H_FLIP: True 68 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 69 | MAX_SIZE: 2000 70 | SCALE_H_FLIP: True 71 | SCALE_SIZE_DEP: True 72 | AREA_TH: 22500 # 150^2 73 | ASPECT_RATIOS: () 74 | ASPECT_RATIO_H_FLIP: False 75 | # -- Test time augmentation example -- # 76 | 77 | OUTPUT_DIR: . 78 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes/coco_to_cityscapes_id.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | # mapping coco categories to cityscapes (our converted json) id 17 | # cityscapes 18 | # INFO roidb.py: 220: 1 bicycle: 7286 19 | # INFO roidb.py: 220: 2 car: 53684 20 | # INFO roidb.py: 220: 3 person: 35704 21 | # INFO roidb.py: 220: 4 train: 336 22 | # INFO roidb.py: 220: 5 truck: 964 23 | # INFO roidb.py: 220: 6 motorcycle: 1468 24 | # INFO roidb.py: 220: 7 bus: 758 25 | # INFO roidb.py: 220: 8 rider: 3504 26 | 27 | # coco (val5k) 28 | # INFO roidb.py: 220: 1 person: 21296 29 | # INFO roidb.py: 220: 2 bicycle: 628 30 | # INFO roidb.py: 220: 3 car: 3818 31 | # INFO roidb.py: 220: 4 motorcycle: 732 32 | # INFO roidb.py: 220: 5 airplane: 286 <------ irrelevant 33 | # INFO roidb.py: 220: 6 bus: 564 34 | # INFO roidb.py: 220: 7 train: 380 35 | # INFO roidb.py: 220: 8 truck: 828 36 | 37 | 38 | def cityscapes_to_coco(cityscapes_id): 39 | lookup = { 40 | 0: 0, # ... background 41 | 1: 2, # bicycle 42 | 2: 3, # car 43 | 3: 1, # person 44 | 4: 7, # train 45 | 5: 8, # truck 46 | 6: 4, # motorcycle 47 | 7: 6, # bus 48 | 8: -1, # rider (-1 means rand init) 49 | } 50 | return lookup[cityscapes_id] 51 | 52 | 53 | def cityscapes_to_coco_with_rider(cityscapes_id): 54 | lookup = { 55 | 0: 0, # ... background 56 | 1: 2, # bicycle 57 | 2: 3, # car 58 | 3: 1, # person 59 | 4: 7, # train 60 | 5: 8, # truck 61 | 6: 4, # motorcycle 62 | 7: 6, # bus 63 | 8: 1, # rider ("person", *rider has human right!*) 64 | } 65 | return lookup[cityscapes_id] 66 | 67 | 68 | def cityscapes_to_coco_without_person_rider(cityscapes_id): 69 | lookup = { 70 | 0: 0, # ... background 71 | 1: 2, # bicycle 72 | 2: 3, # car 73 | 3: -1, # person (ignore) 74 | 4: 7, # train 75 | 5: 8, # truck 76 | 6: 4, # motorcycle 77 | 7: 6, # bus 78 | 8: -1, # rider (ignore) 79 | } 80 | return lookup[cityscapes_id] 81 | 82 | 83 | def cityscapes_to_coco_all_random(cityscapes_id): 84 | lookup = { 85 | 0: -1, # ... background 86 | 1: -1, # bicycle 87 | 2: -1, # car 88 | 3: -1, # person (ignore) 89 | 4: -1, # train 90 | 5: -1, # truck 91 | 6: -1, # motorcycle 92 | 7: -1, # bus 93 | 8: -1, # rider (ignore) 94 | } 95 | return lookup[cityscapes_id] 96 | -------------------------------------------------------------------------------- /lib/modeling/VGG16.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """VGG16 from https://arxiv.org/abs/1409.1556.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from core.config import cfg 24 | 25 | 26 | def add_VGG16_conv5_body(model): 27 | model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1) 28 | model.Relu('conv1_1', 'conv1_1') 29 | model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1) 30 | model.Relu('conv1_2', 'conv1_2') 31 | model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2) 32 | model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1) 33 | model.Relu('conv2_1', 'conv2_1') 34 | model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1) 35 | model.Relu('conv2_2', 'conv2_2') 36 | model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2) 37 | model.StopGradient('pool2', 'pool2') 38 | model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1) 39 | model.Relu('conv3_1', 'conv3_1') 40 | model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1) 41 | model.Relu('conv3_2', 'conv3_2') 42 | model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1) 43 | model.Relu('conv3_3', 'conv3_3') 44 | model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2) 45 | model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1) 46 | model.Relu('conv4_1', 'conv4_1') 47 | model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1) 48 | model.Relu('conv4_2', 'conv4_2') 49 | model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1) 50 | model.Relu('conv4_3', 'conv4_3') 51 | model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2) 52 | model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1) 53 | model.Relu('conv5_1', 'conv5_1') 54 | model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1) 55 | model.Relu('conv5_2', 'conv5_2') 56 | model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1) 57 | blob_out = model.Relu('conv5_3', 'conv5_3') 58 | return blob_out, 512, 1. / 16. 59 | 60 | 61 | def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale): 62 | model.RoIFeatureTransform( 63 | blob_in, 64 | 'pool5', 65 | blob_rois='rois', 66 | method=cfg.FAST_RCNN.ROI_XFORM_METHOD, 67 | resolution=7, 68 | sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, 69 | spatial_scale=spatial_scale 70 | ) 71 | model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096) 72 | model.Relu('fc6', 'fc6') 73 | model.FC('fc6', 'fc7', 4096, 4096) 74 | blob_out = model.Relu('fc7', 'fc7') 75 | return blob_out, 4096 76 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | This document covers frequently asked questions. 4 | 5 | - For general information about Detectron, please see [`README.md`](README.md). 6 | - For installation instructions, please see [`INSTALL.md`](INSTALL.md). 7 | - For a quick getting started guide, please see [`GETTING_STARTED.md`](GETTING_STARTED.md). 8 | 9 | #### Q: How do I compute validation AP during training? 10 | 11 | **A:** Detectron does not compute validation statistics (e.g., AP) during training because this slows training. Instead, we've implemented a "validation monitor", which is a process that polls for new model checkpoints saved by a training job and when one is found performs inference with it by scheduling a job with `tools/test_net.py` asynchronously using free GPUs in our cluster. We have not released the validation monitor because (1) it's a relatively thin wrapper on top of `tools/train_net.py` and (2) the little code that comprises it is specific to our cluster and would not be generally useful. 12 | 13 | #### Q: How do I restrict Detectron to use only a subset of the GPUs on a server? 14 | 15 | **A:** Don't modify the code; use the [`CUDA_VISIBLE_DEVICES`](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) environment variable instead. 16 | 17 | #### Q: Detection on one image is really slow compared to the reported performance, why? 18 | 19 | A: Various algorithms and caches (e.g., from `cudnn`) take some time to warm up. Peak inference performance will not be reached until after a few images have been processed. 20 | 21 | Also potentially relevant: inference with Mask R-CNN on high-resolution images may be slow simply because substantial time is spent upsampling the predicted masks to the original image resolution (this has not been optimized). You can diagnose this issue if the `misc_mask` time reported by `tools/infer_simple.py` is high (e.g., much more than 20-90ms). The solution is to first resize your images such that the short side is around 600-800px (the exact choice does not matter) and then run inference on the resized image. 22 | 23 | 24 | #### Q: How do I implement a custom Caffe2 CPU or GPU operator for use in Detectron? 25 | 26 | **A:** Detectron uses a number of specialized Caffe2 operators that are distributed via the [Caffe2 Detectron module](https://github.com/caffe2/caffe2/tree/master/modules/detectron) as part of the core Caffe2 GitHub repository. If you'd like to implement a custom Caffe2 operator for your project, we have written a toy example illustrating how to add an operator under the Detectron source tree; please see [`lib/ops/zero_even_op.*`](lib/ops/) and [`tests/test_zero_even_op.py`](tests/test_zero_even_op.py). For more background on writing Caffe2 operators please consult the [Caffe2 documentation](https://caffe2.ai/docs/custom-operators.html). 27 | 28 | #### Q: How do I use Detectron to train a model on a custom dataset? 29 | 30 | **A:** If possible, we strongly recommend that you first convert the custom dataset annotation format to the [COCO API json format](http://cocodataset.org/#download). Then, add your dataset to the [dataset catalog](lib/datasets/dataset_catalog.py) so that Detectron can use it for training and inference. If your dataset cannot be converted to the COCO API json format, then it's likely that more significant code modifications will be required. If the dataset you're adding is popular, please consider making the converted annotations publicly available; If code modifications are required, please consider submitting a pull request. 31 | -------------------------------------------------------------------------------- /lib/datasets/data/README.md: -------------------------------------------------------------------------------- 1 | # Setting Up Datasets 2 | 3 | This directory contains symlinks to data locations. 4 | 5 | ## Creating Symlinks for COCO 6 | 7 | Symlink the COCO dataset: 8 | 9 | ``` 10 | ln -s /path/to/coco $DETECTRON/lib/datasets/data/coco 11 | ``` 12 | 13 | We assume that your local COCO dataset copy at `/path/to/coco` has the following directory structure: 14 | 15 | ``` 16 | coco 17 | |_ coco_train2014 18 | | |_ .jpg 19 | | |_ ... 20 | | |_ .jpg 21 | |_ coco_val2014 22 | |_ ... 23 | |_ annotations 24 | |_ instances_train2014.json 25 | |_ ... 26 | ``` 27 | 28 | If that is not the case, you may need to do something similar to: 29 | 30 | ``` 31 | mkdir -p $DETECTRON/lib/datasets/data/coco 32 | ln -s /path/to/coco_train2014 $DETECTRON/lib/datasets/data/coco/ 33 | ln -s /path/to/coco_val2014 $DETECTRON/lib/datasets/data/coco/ 34 | ln -s /path/to/json/annotations $DETECTRON/lib/datasets/data/coco/annotations 35 | ``` 36 | 37 | ### COCO Minival Annotations 38 | 39 | Our custom `minival` and `valminusminival` annotations are available for download [here](https://s3-us-west-2.amazonaws.com/detectron/coco/coco_annotations_minival.tgz). 40 | Please note that `minival` is exactly equivalent to the recently defined 2017 `val` set. 41 | Similarly, the union of `valminusminival` and the 2014 `train` is exactly equivalent to the 2017 `train` set. To complete installation of the COCO dataset, you will need to copy the `minival` and `valminusminival` json annotation files to the `coco/annotations` directory referenced above. 42 | 43 | ## Creating Symlinks for PASCAL VOC 44 | 45 | We assume that your symlinked `lib/datasets/data/VOC` directory has the following structure: 46 | 47 | ``` 48 | VOC 49 | |_ JPEGImages 50 | | |_ .jpg 51 | | |_ ... 52 | | |_ .jpg 53 | |_ annotations 54 | | |_ voc__trainval.json 55 | | |_ ... 56 | |_ VOCdevkit 57 | ``` 58 | 59 | Create symlinks for `VOC`: 60 | 61 | ``` 62 | mkdir -p $DETECTRON/lib/datasets/data/VOC 63 | ln -s /path/to/VOC/JPEGImages $DETECTRON/lib/datasets/data/VOC/JPEGImages 64 | ln -s /path/to/VOC/json/annotations $DETECTRON/lib/datasets/data/VOC/annotations 65 | ln -s /path/to/VOC/devkit $DETECTRON/lib/datasets/data/VOC/VOCdevkit 66 | ``` 67 | 68 | ### PASCAL VOC Annotations in COCO Format 69 | 70 | We expect PASCAL VOC annotations converted to COCO json format, which are available for download [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip ). 71 | 72 | ## Creating Symlinks for Cityscapes: 73 | 74 | We assume that your symlinked `lib/datasets/data/cityscapes` directory has the following structure: 75 | 76 | ``` 77 | cityscapes 78 | |_ images 79 | | |_ .jpg 80 | | |_ ... 81 | | |_ .jpg 82 | |_ annotations 83 | | |_ instanceonly_gtFile_train.json 84 | | |_ ... 85 | |_ raw 86 | |_ gtFine 87 | |_ ... 88 | |_ README.md 89 | ``` 90 | 91 | Create symlinks for `cityscapes`: 92 | 93 | ``` 94 | mkdir -p $DETECTRON/lib/datasets/data/cityscapes 95 | ln -s /path/to/cityscapes/images $DETECTRON/lib/datasets/data/cityscapes/images 96 | ln -s /path/to/cityscapes/json/annotations $DETECTRON/lib/datasets/data/cityscapes/annotations 97 | ln -s /path/to/cityscapes/root $DETECTRON/lib/datasets/data/cityscapes/raw 98 | ``` 99 | 100 | ### Cityscapes Annotations in COCO Format 101 | 102 | We expect Cityscapes annotations converted to COCO json format, which we will make available for download soon. 103 | -------------------------------------------------------------------------------- /tools/generate_testdev_from_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | # Copyright (c) 2017-present, Facebook, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ############################################################################## 17 | 18 | """Given a full set of results (boxes, masks, or keypoints) on the 2017 COCO 19 | test set, this script extracts the results subset that corresponds to 2017 20 | test-dev. The test-dev subset can then be submitted to the COCO evaluation 21 | server. 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | from __future__ import unicode_literals 28 | 29 | import argparse 30 | import json 31 | import os 32 | import sys 33 | 34 | from datasets.dataset_catalog import ANN_FN 35 | from datasets.dataset_catalog import DATASETS 36 | from utils.timer import Timer 37 | 38 | 39 | def parse_args(): 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument( 42 | '--json', dest='json_file', 43 | help='detections json file', 44 | default='', type=str) 45 | parser.add_argument( 46 | '--output-dir', dest='output_dir', 47 | help='output directory', 48 | default='/tmp', type=str) 49 | if len(sys.argv) == 1: 50 | parser.print_help() 51 | sys.exit(1) 52 | args = parser.parse_args() 53 | return args 54 | 55 | 56 | def convert(json_file, output_dir): 57 | print('Reading: {}'.format(json_file)) 58 | with open(json_file, 'r') as fid: 59 | dt = json.load(fid) 60 | print('done!') 61 | 62 | test_image_info = DATASETS['coco_2017_test'][ANN_FN] 63 | with open(test_image_info, 'r') as fid: 64 | info_test = json.load(fid) 65 | image_test = info_test['images'] 66 | image_test_id = [i['id'] for i in image_test] 67 | print('{} has {} images'.format(test_image_info, len(image_test_id))) 68 | 69 | test_dev_image_info = DATASETS['coco_2017_test-dev'][ANN_FN] 70 | with open(test_dev_image_info, 'r') as fid: 71 | info_testdev = json.load(fid) 72 | image_testdev = info_testdev['images'] 73 | image_testdev_id = [i['id'] for i in image_testdev] 74 | print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id))) 75 | 76 | dt_testdev = [] 77 | print('Filtering test-dev from test...') 78 | t = Timer() 79 | t.tic() 80 | for i in range(len(dt)): 81 | if i % 1000 == 0: 82 | print('{}/{}'.format(i, len(dt))) 83 | if dt[i]['image_id'] in image_testdev_id: 84 | dt_testdev.append(dt[i]) 85 | print('Done filtering ({:2}s)!'.format(t.toc())) 86 | 87 | filename, file_extension = os.path.splitext(os.path.basename(json_file)) 88 | filename = filename + '_test-dev' 89 | filename = os.path.join(output_dir, filename + file_extension) 90 | with open(filename, 'w') as fid: 91 | info_test = json.dump(dt_testdev, fid) 92 | print('Done writing: {}!'.format(filename)) 93 | 94 | 95 | if __name__ == '__main__': 96 | opts = parse_args() 97 | convert(opts.json_file, opts.output_dir) 98 | -------------------------------------------------------------------------------- /tests/test_spatial_narrow_as_op.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | import numpy as np 22 | import unittest 23 | 24 | from caffe2.proto import caffe2_pb2 25 | from caffe2.python import core 26 | from caffe2.python import gradient_checker 27 | from caffe2.python import workspace 28 | 29 | import utils.c2 30 | import utils.logging 31 | 32 | 33 | class SpatialNarrowAsOpTest(unittest.TestCase): 34 | def _run_test(self, A, B, check_grad=False): 35 | with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): 36 | op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C']) 37 | workspace.FeedBlob('A', A) 38 | workspace.FeedBlob('B', B) 39 | workspace.RunOperatorOnce(op) 40 | C = workspace.FetchBlob('C') 41 | 42 | if check_grad: 43 | gc = gradient_checker.GradientChecker( 44 | stepsize=0.005, 45 | threshold=0.005, 46 | device_option=core.DeviceOption(caffe2_pb2.CUDA, 0) 47 | ) 48 | 49 | res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0]) 50 | self.assertTrue(res, 'Grad check failed') 51 | 52 | dims = C.shape 53 | C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]] 54 | np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08) 55 | 56 | def test_small_forward_and_gradient(self): 57 | A = np.random.randn(2, 3, 5, 7).astype(np.float32) 58 | B = np.random.randn(2, 3, 2, 2).astype(np.float32) 59 | self._run_test(A, B, check_grad=True) 60 | 61 | A = np.random.randn(2, 3, 5, 7).astype(np.float32) 62 | B = np.random.randn(2, 3, 5).astype(np.float32) 63 | self._run_test(A, B, check_grad=True) 64 | 65 | def test_large_forward(self): 66 | A = np.random.randn(2, 256, 42, 100).astype(np.float32) 67 | B = np.random.randn(2, 256, 35, 87).astype(np.float32) 68 | self._run_test(A, B) 69 | 70 | A = np.random.randn(2, 256, 42, 87).astype(np.float32) 71 | B = np.random.randn(2, 256, 35, 87).astype(np.float32) 72 | self._run_test(A, B) 73 | 74 | def test_size_exceptions(self): 75 | A = np.random.randn(2, 256, 42, 86).astype(np.float32) 76 | B = np.random.randn(2, 256, 35, 87).astype(np.float32) 77 | with self.assertRaises(RuntimeError): 78 | self._run_test(A, B) 79 | 80 | A = np.random.randn(2, 255, 42, 88).astype(np.float32) 81 | B = np.random.randn(2, 256, 35, 87).astype(np.float32) 82 | with self.assertRaises(RuntimeError): 83 | self._run_test(A, B) 84 | 85 | 86 | if __name__ == '__main__': 87 | workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) 88 | utils.c2.import_detectron_ops() 89 | assert 'SpatialNarrowAs' in workspace.RegisteredOperators() 90 | utils.logging.setup_logging(__name__) 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /lib/modeling/rfcn_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | from core.config import cfg 22 | from utils.c2 import const_fill 23 | from utils.c2 import gauss_fill 24 | 25 | 26 | # ---------------------------------------------------------------------------- # 27 | # R-FCN outputs and losses 28 | # ---------------------------------------------------------------------------- # 29 | 30 | def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): 31 | if dim_reduce is not None: 32 | # Optional dim reduction 33 | blob_in = model.Conv( 34 | blob_in, 35 | 'conv_dim_reduce', 36 | dim_in, 37 | dim_reduce, 38 | kernel=1, 39 | pad=0, 40 | stride=1, 41 | weight_init=gauss_fill(0.01), 42 | bias_init=const_fill(0.0) 43 | ) 44 | blob_in = model.Relu(blob_in, blob_in) 45 | dim_in = dim_reduce 46 | # Classification conv 47 | model.Conv( 48 | blob_in, 49 | 'conv_cls', 50 | dim_in, 51 | model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, 52 | kernel=1, 53 | pad=0, 54 | stride=1, 55 | weight_init=gauss_fill(0.01), 56 | bias_init=const_fill(0.0) 57 | ) 58 | # # Bounding-box regression conv 59 | num_bbox_reg_classes = ( 60 | 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes 61 | ) 62 | model.Conv( 63 | blob_in, 64 | 'conv_bbox_pred', 65 | dim_in, 66 | 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, 67 | kernel=1, 68 | pad=0, 69 | stride=1, 70 | weight_init=gauss_fill(0.01), 71 | bias_init=const_fill(0.0) 72 | ) 73 | # Classification PS RoI pooling 74 | model.net.PSRoIPool( 75 | ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], 76 | group_size=cfg.RFCN.PS_GRID_SIZE, 77 | output_dim=model.num_classes, 78 | spatial_scale=spatial_scale 79 | ) 80 | model.AveragePool( 81 | 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE 82 | ) 83 | model.net.Reshape( 84 | 'cls_score_4d', ['cls_score', '_cls_scores_shape'], 85 | shape=(-1, cfg.MODEL.NUM_CLASSES) 86 | ) 87 | if not model.train: 88 | model.Softmax('cls_score', 'cls_prob', engine='CUDNN') 89 | # Bbox regression PS RoI pooling 90 | model.net.PSRoIPool( 91 | ['conv_bbox_pred', 'rois'], 92 | ['psroipooled_bbox', '_mapping_channel_bbox'], 93 | group_size=cfg.RFCN.PS_GRID_SIZE, 94 | output_dim=4 * num_bbox_reg_classes, 95 | spatial_scale=spatial_scale 96 | ) 97 | model.AveragePool( 98 | 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE 99 | ) 100 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes_json_dataset_evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Functions for evaluating results on Cityscapes.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import cv2 24 | import logging 25 | import os 26 | import uuid 27 | 28 | import pycocotools.mask as mask_util 29 | 30 | from core.config import cfg 31 | from datasets.dataset_catalog import DATASETS 32 | from datasets.dataset_catalog import RAW_DIR 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def evaluate_masks( 38 | json_dataset, 39 | all_boxes, 40 | all_segms, 41 | output_dir, 42 | use_salt=True, 43 | cleanup=False 44 | ): 45 | if cfg.CLUSTER.ON_CLUSTER: 46 | # On the cluster avoid saving these files in the job directory 47 | output_dir = '/tmp' 48 | res_file = os.path.join( 49 | output_dir, 'segmentations_' + json_dataset.name + '_results') 50 | if use_salt: 51 | res_file += '_{}'.format(str(uuid.uuid4())) 52 | res_file += '.json' 53 | 54 | results_dir = os.path.join(output_dir, 'results') 55 | if not os.path.exists(results_dir): 56 | os.mkdir(results_dir) 57 | 58 | os.environ['CITYSCAPES_DATASET'] = DATASETS[json_dataset.name][RAW_DIR] 59 | os.environ['CITYSCAPES_RESULTS'] = output_dir 60 | 61 | # Load the Cityscapes eval script *after* setting the required env vars, 62 | # since the script reads their values into global variables (at load time). 63 | import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \ 64 | as cityscapes_eval 65 | 66 | roidb = json_dataset.get_roidb() 67 | for i, entry in enumerate(roidb): 68 | im_name = entry['image'] 69 | 70 | basename = os.path.splitext(os.path.basename(im_name))[0] 71 | txtname = os.path.join(output_dir, basename + 'pred.txt') 72 | with open(txtname, 'w') as fid_txt: 73 | if i % 10 == 0: 74 | logger.info('i: {}: {}'.format(i, basename)) 75 | for j in range(1, len(all_segms)): 76 | clss = json_dataset.classes[j] 77 | clss_id = cityscapes_eval.name2label[clss].id 78 | segms = all_segms[j][i] 79 | boxes = all_boxes[j][i] 80 | if segms == []: 81 | continue 82 | masks = mask_util.decode(segms) 83 | 84 | for k in range(boxes.shape[0]): 85 | score = boxes[k, -1] 86 | mask = masks[:, :, k] 87 | pngname = os.path.join( 88 | 'results', 89 | basename + '_' + clss + '_{}.png'.format(k)) 90 | # write txt 91 | fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score)) 92 | # save mask 93 | cv2.imwrite(os.path.join(output_dir, pngname), mask * 255) 94 | logger.info('Evaluating...') 95 | cityscapes_eval.main([]) 96 | return None 97 | --------------------------------------------------------------------------------