├── .flake8 ├── .gitignore ├── ABSTRACTIONS.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TROUBLESHOOTING.md ├── configs ├── caffe2 │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── cityscapes │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_faster_rcnn_fbnet.yaml ├── e2e_faster_rcnn_fbnet_600.yaml ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_fbnet.yaml ├── e2e_mask_rcnn_fbnet_600.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml ├── fad │ ├── augment │ │ ├── fad-fcos_imprv_R_101_FPN_2x.yaml │ │ ├── fad-fcos_imprv_R_50_FPN_1x.yaml │ │ └── fad-fcos_imprv_X_101_64x4d_FPN_2x.yaml │ └── search │ │ └── fad-fcos_imprv_R_50_FPN_1x.yaml ├── fcos │ ├── fcos_R_101_FPN_2x.yaml │ ├── fcos_R_50_FPN_1x.yaml │ ├── fcos_X_101_32x8d_FPN_2x.yaml │ ├── fcos_X_101_64x4d_FPN_2x.yaml │ ├── fcos_bn_bs16_MNV2_FPN_1x.yaml │ ├── fcos_imprv_R_101_FPN_1x.yaml │ ├── fcos_imprv_R_101_FPN_2x.yaml │ ├── fcos_imprv_R_50_FPN_1x.yaml │ ├── fcos_imprv_X_101_32x8d_FPN_2x.yaml │ ├── fcos_imprv_X_101_64x4d_FPN_2x.yaml │ ├── fcos_imprv_dcnv2_R_101_FPN_1x.yaml │ ├── fcos_imprv_dcnv2_R_101_FPN_2x.yaml │ ├── fcos_imprv_dcnv2_R_50_FPN_1x.yaml │ ├── fcos_imprv_dcnv2_X_101_32x8d_FPN_2x.yaml │ ├── fcos_imprv_dcnv2_X_101_64x4d_FPN_2x.yaml │ ├── fcos_syncbn_bs32_MNV2_FPN_1x.yaml │ ├── fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml │ ├── fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml │ └── fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml ├── gn_baselines │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml ├── pascal_voc │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml ├── quick_schedules │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml │ ├── rpn_R_50_C4_quick.yaml │ └── rpn_R_50_FPN_quick.yaml ├── retinanet │ ├── retinanet_R-101-FPN_1x.yaml │ ├── retinanet_R-101-FPN_P5_1x.yaml │ ├── retinanet_R-50-FPN_1x.yaml │ ├── retinanet_R-50-FPN_1x_quick.yaml │ ├── retinanet_R-50-FPN_P5_1x.yaml │ └── retinanet_X_101_32x8d_FPN_1x.yaml ├── rpn_R_101_FPN_1x.yaml ├── rpn_R_50_C4_1x.yaml ├── rpn_R_50_FPN_1x.yaml └── rpn_X_101_32x8d_FPN_1x.yaml ├── fad_core ├── architect.py ├── engine │ └── trainer.py ├── genotypes.py ├── modeling │ ├── modules │ │ ├── augment_cells.py │ │ ├── augment_rcnn.py │ │ ├── search_cells.py │ │ └── search_rcnn.py │ ├── ops.py │ ├── rpn │ │ ├── fcos │ │ │ ├── __init__.py │ │ │ └── fad_fcos.py │ │ └── retinanet │ │ │ └── fad_retinanet.py │ └── utils.py └── visualize.py ├── fcos_core ├── README.md ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── ml_nms.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── ml_nms.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── bbox_aug.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ ├── deform_conv_module.py │ │ ├── deform_pool_func.py │ │ └── deform_pool_module.py │ ├── iou_loss.py │ ├── iou_loss_search.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── scale.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fbnet.py │ │ ├── fbnet_builder.py │ │ ├── fbnet_modeldef.py │ │ ├── fpn.py │ │ ├── mobilenet.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── fcos │ │ │ ├── __init__.py │ │ │ ├── fcos.py │ │ │ ├── inference.py │ │ │ └── loss.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ └── retinanet.py │ │ ├── rpn.py │ │ └── utils.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── registry.py │ ├── tensorboard.py │ └── timer.py ├── requirements.txt ├── setup.py ├── tests ├── checkpoint.py ├── env_tests │ └── env.py ├── test_backbones.py ├── test_box_coder.py ├── test_configs.py ├── test_data_samplers.py ├── test_detectors.py ├── test_fbnet.py ├── test_feature_extractors.py ├── test_metric_logger.py ├── test_nms.py ├── test_predictors.py ├── test_rpn_heads.py ├── test_segmentation_mask.py └── utils.py └── tools ├── cityscapes ├── convert_cityscapes_to_coco.py └── instances2dict_with_polygons.py ├── remove_solver_states.py ├── search_net.py ├── test_net.py └── train_net.py /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = E203, E266, E501, W503 6 | max-line-length = 80 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | maskrcnn_benchmark.egg-info/ 7 | build/ 8 | dist/ 9 | 10 | # pytorch/python/numpy formats 11 | *.pth 12 | *.pkl 13 | *.npy 14 | 15 | # ipython/jupyter notebooks 16 | *.ipynb 17 | **/.ipynb_checkpoints/ 18 | 19 | # Editor temporaries 20 | *.swn 21 | *.swo 22 | *.swp 23 | *~ 24 | 25 | # Pycharm editor settings 26 | .idea 27 | 28 | # project dirs 29 | /datasets 30 | /models 31 | /training_dir 32 | -------------------------------------------------------------------------------- /ABSTRACTIONS.md: -------------------------------------------------------------------------------- 1 | ## Abstractions 2 | The main abstractions introduced by `maskrcnn_benchmark` that are useful to 3 | have in mind are the following: 4 | 5 | ### ImageList 6 | In PyTorch, the first dimension of the input to the network generally represents 7 | the batch dimension, and thus all elements of the same batch have the same 8 | height / width. 9 | In order to support images with different sizes and aspect ratios in the same 10 | batch, we created the `ImageList` class, which holds internally a batch of 11 | images (os possibly different sizes). The images are padded with zeros such that 12 | they have the same final size and batched over the first dimension. The original 13 | sizes of the images before padding are stored in the `image_sizes` attribute, 14 | and the batched tensor in `tensors`. 15 | We provide a convenience function `to_image_list` that accepts a few different 16 | input types, including a list of tensors, and returns an `ImageList` object. 17 | 18 | ```python 19 | from maskrcnn_benchmark.structures.image_list import to_image_list 20 | 21 | images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)] 22 | batched_images = to_image_list(images) 23 | 24 | # it is also possible to make the final batched image be a multiple of a number 25 | batched_images_32 = to_image_list(images, size_divisible=32) 26 | ``` 27 | 28 | ### BoxList 29 | The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for 30 | a specific image, as well as the size of the image as a `(width, height)` tuple. 31 | It also contains a set of methods that allow to perform geometric 32 | transformations to the bounding boxes (such as cropping, scaling and flipping). 33 | The class accepts bounding boxes from two different input formats: 34 | - `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and 35 | - `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`. 36 | 37 | Additionally, each `BoxList` instance can also hold arbitrary additional information 38 | for each bounding box, such as labels, visibility, probability scores etc. 39 | 40 | Here is an example on how to create a `BoxList` from a list of coordinates: 41 | ```python 42 | from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT 43 | 44 | width = 100 45 | height = 200 46 | boxes = [ 47 | [0, 10, 50, 50], 48 | [50, 20, 90, 60], 49 | [10, 10, 50, 50] 50 | ] 51 | # create a BoxList with 3 boxes 52 | bbox = BoxList(boxes, image_size=(width, height), mode='xyxy') 53 | 54 | # perform some box transformations, has similar API as PIL.Image 55 | bbox_scaled = bbox.resize((width * 2, height * 3)) 56 | bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT) 57 | 58 | # add labels for each bbox 59 | labels = torch.tensor([0, 10, 1]) 60 | bbox.add_field('labels', labels) 61 | 62 | # bbox also support a few operations, like indexing 63 | # here, selects boxes 0 and 2 64 | bbox_subset = bbox[[0, 2]] 65 | ``` 66 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements: 4 | - PyTorch >= 1.0. Installation instructions can be found in https://pytorch.org/get-started/locally/. 5 | - torchvision 6 | - cocoapi 7 | - yacs 8 | - matplotlib 9 | - GCC >= 4.9,< 6.0 10 | - (optional) OpenCV for the webcam demo 11 | 12 | ### Step-by-step installation 13 | 14 | ```bash 15 | conda create --name FAD 16 | conda activate FAD 17 | 18 | # this installs the right pip and dependencies for the fresh python 19 | conda install ipython 20 | 21 | # FCOS and coco api dependencies 22 | pip install ninja yacs cython matplotlib tqdm tensorboard graphviz 23 | 24 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 25 | # we give the instructions for CUDA 10.2 26 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch 27 | 28 | export INSTALL_DIR=$PWD 29 | 30 | # install pycocotools. Please make sure you have installed cython. 31 | cd $INSTALL_DIR 32 | git clone https://github.com/cocodataset/cocoapi.git 33 | cd cocoapi/PythonAPI 34 | python setup.py build_ext install 35 | 36 | # install PyTorch Detection 37 | cd $INSTALL_DIR 38 | git clone https://github.com/MalongTech/research-fad.git 39 | cd research-fad 40 | 41 | # the following will install the lib with 42 | # symbolic links, so that you can modify 43 | # the files if you want and won't need to 44 | # re-build it 45 | python setup.py build develop --no-deps 46 | 47 | 48 | unset INSTALL_DIR 49 | 50 | # or if you are on macOS 51 | # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop 52 | ``` 53 | 54 | 55 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x" 4 | DATASETS: 5 | TEST: ("coco_2014_minival",) 6 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TEST: ("coco_2014_minival",) 25 | DATALOADER: 26 | SIZE_DIVISIBILITY: 32 27 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | DATASETS: 27 | TEST: ("coco_2014_minival",) 28 | DATALOADER: 29 | SIZE_DIVISIBILITY: 32 30 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x" 4 | ROI_MASK_HEAD: 5 | PREDICTOR: "MaskRCNNC4Predictor" 6 | SHARE_BOX_FEATURE_EXTRACTOR: True 7 | MASK_ON: True 8 | DATASETS: 9 | TEST: ("coco_2014_minival",) 10 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x" 4 | BACKBONE: 5 | CONV_BODY: "R-152-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | DATASETS: 25 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 26 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 27 | DATALOADER: 28 | SIZE_DIVISIBILITY: 32 29 | SOLVER: 30 | BASE_LR: 0.01 31 | WEIGHT_DECAY: 0.0001 32 | STEPS: (18000,) 33 | MAX_ITER: 24000 34 | -------------------------------------------------------------------------------- /configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 9 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",) 35 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RPN: 7 | USE_FPN: True 8 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 9 | PRE_NMS_TOP_N_TRAIN: 2000 10 | PRE_NMS_TOP_N_TEST: 1000 11 | POST_NMS_TOP_N_TEST: 1000 12 | FPN_POST_NMS_TOP_N_TEST: 1000 13 | ROI_HEADS: 14 | USE_FPN: True 15 | ROI_BOX_HEAD: 16 | POOLER_RESOLUTION: 7 17 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 18 | POOLER_SAMPLING_RATIO: 2 19 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 20 | PREDICTOR: "FPNPredictor" 21 | RESNETS: 22 | BACKBONE_OUT_CHANNELS: 256 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 100 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 512 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (320, ) 40 | MAX_SIZE_TRAIN: 640 41 | MIN_SIZE_TEST: 320 42 | MAX_SIZE_TEST: 640 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "cham_v1a" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 128 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.045 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (90000, 120000) 34 | MAX_ITER: 135000 35 | IMS_PER_BATCH: 96 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2017_train", ) 34 | TEST: ("coco_2017_val",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 2 43 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: -1.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 512 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/fad/augment/fad-fcos_imprv_R_101_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | MODEL: 9 | META_ARCHITECTURE: "GeneralizedRNAS" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 11 | RPN_ONLY: True 12 | FAD_ON: True 13 | FCOS_ON: True 14 | RETINANET_ON: False 15 | BACKBONE: 16 | CONV_BODY: "R-101-FPN-RETINANET" 17 | RESNETS: 18 | BACKBONE_OUT_CHANNELS: 256 19 | RETINANET: 20 | USE_C5: False # FCOS uses P5 instead of C5 21 | FCOS: 22 | # normalizing the regression targets with FPN strides 23 | NORM_REG_TARGETS: True 24 | # positioning centerness on the regress branch. 25 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 26 | CENTERNESS_ON_REG: True 27 | # using center sampling and GIoU. 28 | # Please refer to https://github.com/yqyao/FCOS_PLUS 29 | CENTER_SAMPLING_RADIUS: 1.5 30 | IOU_LOSS_TYPE: "giou" 31 | FAD: 32 | SEARCH: False 33 | CLSTOWER: True 34 | NUM_CELLS_CLS: 2 35 | NUM_NODES_CLS: 3 36 | NUM_CHANNELS_CLS: 96 37 | BOXTOWER: True 38 | NUM_CELLS_BOX: 2 39 | NUM_NODES_BOX: 3 40 | NUM_CHANNELS_BOX: 96 41 | DATASETS: 42 | TRAIN: ("coco_2017_train",) 43 | TEST: ("coco_2017_val",) 44 | INPUT: 45 | MIN_SIZE_RANGE_TRAIN: (640, 800) 46 | MAX_SIZE_TRAIN: 1333 47 | MIN_SIZE_TEST: 800 48 | MAX_SIZE_TEST: 1333 49 | DATALOADER: 50 | SIZE_DIVISIBILITY: 32 51 | SOLVER: 52 | BASE_LR: 0.01 53 | WEIGHT_DECAY: 0.0001 54 | STEPS: (120000, 160000) 55 | MAX_ITER: 180000 56 | IMS_PER_BATCH: 16 57 | WARMUP_METHOD: "constant" 58 | -------------------------------------------------------------------------------- /configs/fad/augment/fad-fcos_imprv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | MODEL: 9 | META_ARCHITECTURE: "GeneralizedRNAS" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | RPN_ONLY: True 12 | FAD_ON: True 13 | FCOS_ON: True 14 | RETINANET_ON: False 15 | BACKBONE: 16 | CONV_BODY: "R-50-FPN-RETINANET" 17 | RESNETS: 18 | BACKBONE_OUT_CHANNELS: 256 19 | RETINANET: 20 | USE_C5: False # FCOS uses P5 instead of C5 21 | FCOS: 22 | # normalizing the regression targets with FPN strides 23 | NORM_REG_TARGETS: True 24 | CENTERNESS_ON_REG: True 25 | CENTER_SAMPLING_RADIUS: 1.5 26 | IOU_LOSS_TYPE: "giou" 27 | FAD: 28 | SEARCH: False 29 | CLSTOWER: True 30 | NUM_CELLS_CLS: 2 31 | NUM_NODES_CLS: 3 32 | NUM_CHANNELS_CLS: 96 33 | BOXTOWER: True 34 | NUM_CELLS_BOX: 2 35 | NUM_NODES_BOX: 3 36 | NUM_CHANNELS_BOX: 96 37 | DATASETS: 38 | TRAIN: ("coco_2017_train",) 39 | TEST: ("coco_2017_val",) 40 | INPUT: 41 | MIN_SIZE_TRAIN: (800,) 42 | MAX_SIZE_TRAIN: 1333 43 | MIN_SIZE_TEST: 800 44 | MAX_SIZE_TEST: 1333 45 | DATALOADER: 46 | SIZE_DIVISIBILITY: 32 47 | SOLVER: 48 | BASE_LR: 0.01 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (60000, 80000) 51 | MAX_ITER: 90000 52 | IMS_PER_BATCH: 16 53 | WARMUP_METHOD: "constant" 54 | -------------------------------------------------------------------------------- /configs/fad/augment/fad-fcos_imprv_X_101_64x4d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | MODEL: 9 | META_ARCHITECTURE: "GeneralizedRNAS" 10 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/k3ys35075jmU1RP/download#X-101-64x4d.pkl" 11 | RPN_ONLY: True 12 | FAD_ON: True 13 | FCOS_ON: True 14 | RETINANET_ON: False 15 | BACKBONE: 16 | CONV_BODY: "R-101-FPN-RETINANET" 17 | RESNETS: 18 | STRIDE_IN_1X1: False 19 | BACKBONE_OUT_CHANNELS: 256 20 | NUM_GROUPS: 64 21 | WIDTH_PER_GROUP: 4 22 | RETINANET: 23 | USE_C5: False # FCOS uses P5 instead of C5 24 | FCOS: 25 | # normalizing the regression targets with FPN strides 26 | NORM_REG_TARGETS: True 27 | # positioning centerness on the regress branch. 28 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 29 | CENTERNESS_ON_REG: True 30 | # using center sampling and GIoU. 31 | # Please refer to https://github.com/yqyao/FCOS_PLUS 32 | CENTER_SAMPLING_RADIUS: 1.5 33 | IOU_LOSS_TYPE: "giou" 34 | FAD: 35 | SEARCH: False 36 | CLSTOWER: True 37 | NUM_CELLS_CLS: 2 38 | NUM_NODES_CLS: 3 39 | NUM_CHANNELS_CLS: 128 40 | BOXTOWER: True 41 | NUM_CELLS_BOX: 2 42 | NUM_NODES_BOX: 3 43 | NUM_CHANNELS_BOX: 128 44 | DATASETS: 45 | TRAIN: ("coco_2017_train",) 46 | TEST: ("coco_2017_val",) 47 | INPUT: 48 | MIN_SIZE_RANGE_TRAIN: (640, 800) 49 | MAX_SIZE_TRAIN: 1333 50 | MIN_SIZE_TEST: 800 51 | MAX_SIZE_TEST: 1333 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | BASE_LR: 0.01 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (120000, 160000) 58 | MAX_ITER: 180000 59 | IMS_PER_BATCH: 16 60 | WARMUP_METHOD: "constant" 61 | -------------------------------------------------------------------------------- /configs/fad/search/fad-fcos_imprv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | MODEL: 9 | META_ARCHITECTURE: "GeneralizedRNAS" 10 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 11 | RPN_ONLY: True 12 | FAD_ON: True 13 | FCOS_ON: True 14 | BACKBONE: 15 | CONV_BODY: "R-50-FPN-RETINANET" 16 | FREEZE_CONV_BODY_AT: 2 17 | RESNETS: 18 | BACKBONE_OUT_CHANNELS: 256 19 | RETINANET: 20 | USE_C5: False # FCOS uses P5 instead of C5 21 | FCOS: 22 | # normalizing the regression targets with FPN strides 23 | NORM_REG_TARGETS: True 24 | CENTERNESS_ON_REG: True 25 | CENTER_SAMPLING_RADIUS: 1.5 26 | IOU_LOSS_TYPE: "giou" 27 | FAD: 28 | PLOT: True 29 | SEARCH: True 30 | CLSTOWER: True 31 | NUM_CELLS_CLS: 1 32 | NUM_NODES_CLS: 3 33 | NUM_CHANNELS_CLS: 96 34 | BOXTOWER: True 35 | NUM_CELLS_BOX: 1 36 | NUM_NODES_BOX: 3 37 | NUM_CHANNELS_BOX: 96 38 | DATASETS: 39 | TRAIN: ("voc_2012_train", "voc_2012_val") 40 | TEST: ("voc_2012_test",) 41 | 42 | INPUT: 43 | MIN_SIZE_TRAIN: (416,) 44 | MAX_SIZE_TRAIN: 693 45 | MIN_SIZE_TEST: 800 46 | MAX_SIZE_TEST: 1333 47 | DATALOADER: 48 | SIZE_DIVISIBILITY: 32 49 | SOLVER: 50 | BASE_LR: 0.004 51 | WEIGHT_DECAY: 0.0001 52 | STEPS: (12500, ) 53 | MAX_ITER: 17500 54 | CHECKPOINT_PERIOD: 2500 55 | IMS_PER_BATCH: 4 56 | -------------------------------------------------------------------------------- /configs/fcos/fcos_R_101_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RETINANET: 11 | USE_C5: False # FCOS uses P5 instead of C5 12 | DATASETS: 13 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 14 | TEST: ("coco_2014_minival",) 15 | INPUT: 16 | MIN_SIZE_RANGE_TRAIN: (640, 800) 17 | MAX_SIZE_TRAIN: 1333 18 | MIN_SIZE_TEST: 800 19 | MAX_SIZE_TEST: 1333 20 | DATALOADER: 21 | SIZE_DIVISIBILITY: 32 22 | SOLVER: 23 | BASE_LR: 0.01 24 | WEIGHT_DECAY: 0.0001 25 | STEPS: (120000, 160000) 26 | MAX_ITER: 180000 27 | IMS_PER_BATCH: 16 28 | WARMUP_METHOD: "constant" -------------------------------------------------------------------------------- /configs/fcos/fcos_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RETINANET: 11 | USE_C5: False # FCOS uses P5 instead of C5 12 | DATASETS: 13 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 14 | TEST: ("coco_2014_minival",) 15 | INPUT: 16 | MIN_SIZE_TRAIN: (800,) 17 | MAX_SIZE_TRAIN: 1333 18 | MIN_SIZE_TEST: 800 19 | MAX_SIZE_TEST: 1333 20 | DATALOADER: 21 | SIZE_DIVISIBILITY: 32 22 | SOLVER: 23 | BASE_LR: 0.01 24 | WEIGHT_DECAY: 0.0001 25 | STEPS: (60000, 80000) 26 | MAX_ITER: 90000 27 | IMS_PER_BATCH: 16 28 | WARMUP_METHOD: "constant" 29 | TEST: 30 | BBOX_AUG: 31 | ENABLED: False 32 | H_FLIP: True 33 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 34 | MAX_SIZE: 2000 35 | SCALE_H_FLIP: True 36 | -------------------------------------------------------------------------------- /configs/fcos/fcos_X_101_32x8d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | INPUT: 19 | MIN_SIZE_RANGE_TRAIN: (640, 800) 20 | MAX_SIZE_TRAIN: 1333 21 | MIN_SIZE_TEST: 800 22 | MAX_SIZE_TEST: 1333 23 | DATALOADER: 24 | SIZE_DIVISIBILITY: 32 25 | SOLVER: 26 | BASE_LR: 0.01 27 | WEIGHT_DECAY: 0.0001 28 | STEPS: (120000, 160000) 29 | MAX_ITER: 180000 30 | IMS_PER_BATCH: 16 31 | WARMUP_METHOD: "constant" 32 | -------------------------------------------------------------------------------- /configs/fcos/fcos_X_101_64x4d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-64x4d" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 64 12 | WIDTH_PER_GROUP: 4 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | INPUT: 19 | MIN_SIZE_RANGE_TRAIN: (640, 800) 20 | MAX_SIZE_TRAIN: 1333 21 | MIN_SIZE_TEST: 800 22 | MAX_SIZE_TEST: 1333 23 | DATALOADER: 24 | SIZE_DIVISIBILITY: 32 25 | SOLVER: 26 | BASE_LR: 0.01 27 | WEIGHT_DECAY: 0.0001 28 | STEPS: (120000, 160000) 29 | MAX_ITER: 180000 30 | IMS_PER_BATCH: 16 31 | WARMUP_METHOD: "constant" 32 | -------------------------------------------------------------------------------- /configs/fcos/fcos_bn_bs16_MNV2_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "MNV2-FPN-RETINANET" 8 | FREEZE_CONV_BODY_AT: 0 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 256 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | USE_SYNCBN: False 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: (800,) 19 | MAX_SIZE_TRAIN: 1333 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1333 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.01 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | IMS_PER_BATCH: 16 30 | WARMUP_METHOD: "constant" 31 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RETINANET: 11 | USE_C5: False # FCOS uses P5 instead of C5 12 | FCOS: 13 | # normalizing the regression targets with FPN strides 14 | NORM_REG_TARGETS: True 15 | # positioning centerness on the regress branch. 16 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 17 | CENTERNESS_ON_REG: True 18 | # using center sampling and GIoU. 19 | # Please refer to https://github.com/yqyao/FCOS_PLUS 20 | CENTER_SAMPLING_RADIUS: 1.5 21 | IOU_LOSS_TYPE: "giou" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | INPUT: 26 | MIN_SIZE_TRAIN: (800,) 27 | MAX_SIZE_TRAIN: 1333 28 | MIN_SIZE_TEST: 800 29 | MAX_SIZE_TEST: 1333 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | SOLVER: 33 | BASE_LR: 0.01 34 | WEIGHT_DECAY: 0.0001 35 | STEPS: (60000, 80000) 36 | MAX_ITER: 90000 37 | IMS_PER_BATCH: 16 38 | WARMUP_METHOD: "constant" 39 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_R_101_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RETINANET: 11 | USE_C5: False # FCOS uses P5 instead of C5 12 | FCOS: 13 | # normalizing the regression targets with FPN strides 14 | NORM_REG_TARGETS: True 15 | # positioning centerness on the regress branch. 16 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 17 | CENTERNESS_ON_REG: True 18 | # using center sampling and GIoU. 19 | # Please refer to https://github.com/yqyao/FCOS_PLUS 20 | CENTER_SAMPLING_RADIUS: 1.5 21 | IOU_LOSS_TYPE: "giou" 22 | DATASETS: 23 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 24 | TEST: ("coco_2014_minival",) 25 | INPUT: 26 | MIN_SIZE_RANGE_TRAIN: (640, 800) 27 | MAX_SIZE_TRAIN: 1333 28 | MIN_SIZE_TEST: 800 29 | MAX_SIZE_TEST: 1333 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | SOLVER: 33 | BASE_LR: 0.01 34 | WEIGHT_DECAY: 0.0001 35 | STEPS: (120000, 160000) 36 | MAX_ITER: 180000 37 | IMS_PER_BATCH: 16 38 | WARMUP_METHOD: "constant" -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | # FCOS with improvements 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 5 | RPN_ONLY: True 6 | FCOS_ON: True 7 | BACKBONE: 8 | CONV_BODY: "R-50-FPN-RETINANET" 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 256 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | FCOS: 14 | # normalizing the regression targets with FPN strides 15 | NORM_REG_TARGETS: True 16 | # positioning centerness on the regress branch. 17 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 18 | CENTERNESS_ON_REG: True 19 | # using center sampling and GIoU. 20 | # Please refer to https://github.com/yqyao/FCOS_PLUS 21 | CENTER_SAMPLING_RADIUS: 1.5 22 | IOU_LOSS_TYPE: "giou" 23 | DATASETS: 24 | TRAIN: ("coco_2017_train",) 25 | TEST: ("coco_2017_val",) 26 | INPUT: 27 | MIN_SIZE_TRAIN: (800,) 28 | MAX_SIZE_TRAIN: 1333 29 | MIN_SIZE_TEST: 800 30 | MAX_SIZE_TEST: 1333 31 | DATALOADER: 32 | SIZE_DIVISIBILITY: 32 33 | SOLVER: 34 | BASE_LR: 0.01 35 | WEIGHT_DECAY: 0.0001 36 | STEPS: (60000, 80000) 37 | MAX_ITER: 90000 38 | IMS_PER_BATCH: 4 39 | WARMUP_METHOD: "constant" 40 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_X_101_32x8d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | FCOS: 16 | # normalizing the regression targets with FPN strides 17 | NORM_REG_TARGETS: True 18 | # positioning centerness on the regress branch. 19 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 20 | CENTERNESS_ON_REG: True 21 | # using center sampling and GIoU. 22 | # Please refer to https://github.com/yqyao/FCOS_PLUS 23 | CENTER_SAMPLING_RADIUS: 1.5 24 | IOU_LOSS_TYPE: "giou" 25 | DATASETS: 26 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 27 | TEST: ("coco_2014_minival",) 28 | INPUT: 29 | MIN_SIZE_RANGE_TRAIN: (640, 800) 30 | MAX_SIZE_TRAIN: 1333 31 | MIN_SIZE_TEST: 800 32 | MAX_SIZE_TEST: 1333 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.01 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (120000, 160000) 39 | MAX_ITER: 180000 40 | IMS_PER_BATCH: 16 41 | WARMUP_METHOD: "constant" 42 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_X_101_64x4d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/k3ys35075jmU1RP/download#X-101-64x4d.pkl" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 64 12 | WIDTH_PER_GROUP: 4 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | FCOS: 16 | # normalizing the regression targets with FPN strides 17 | NORM_REG_TARGETS: True 18 | # positioning centerness on the regress branch. 19 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 20 | CENTERNESS_ON_REG: True 21 | # using center sampling and GIoU. 22 | # Please refer to https://github.com/yqyao/FCOS_PLUS 23 | CENTER_SAMPLING_RADIUS: 1.5 24 | IOU_LOSS_TYPE: "giou" 25 | DATASETS: 26 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 27 | TEST: ("coco_2014_minival",) 28 | INPUT: 29 | MIN_SIZE_RANGE_TRAIN: (640, 800) 30 | MAX_SIZE_TRAIN: 1333 31 | MIN_SIZE_TEST: 800 32 | MAX_SIZE_TEST: 1333 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.01 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (120000, 160000) 39 | MAX_ITER: 180000 40 | IMS_PER_BATCH: 16 41 | WARMUP_METHOD: "constant" 42 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_dcnv2_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | STAGE_WITH_DCN: (False, True, True, True) 11 | WITH_MODULATED_DCN: True 12 | DEFORMABLE_GROUPS: 1 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | FCOS: 16 | # normalizing the regression targets with FPN strides 17 | NORM_REG_TARGETS: True 18 | # positioning centerness on the regress branch. 19 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 20 | CENTERNESS_ON_REG: True 21 | # using center sampling and GIoU. 22 | # Please refer to https://github.com/yqyao/FCOS_PLUS 23 | CENTER_SAMPLING_RADIUS: 1.5 24 | IOU_LOSS_TYPE: "giou" 25 | # we only use dcn in the last layer of towers 26 | USE_DCN_IN_TOWER: True 27 | DATASETS: 28 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: (800,) 32 | MAX_SIZE_TRAIN: 1333 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1333 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | WARMUP_METHOD: "constant" 44 | 45 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_dcnv2_R_101_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | STAGE_WITH_DCN: (False, True, True, True) 11 | WITH_MODULATED_DCN: True 12 | DEFORMABLE_GROUPS: 1 13 | RETINANET: 14 | USE_C5: False # FCOS uses P5 instead of C5 15 | FCOS: 16 | # normalizing the regression targets with FPN strides 17 | NORM_REG_TARGETS: True 18 | # positioning centerness on the regress branch. 19 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 20 | CENTERNESS_ON_REG: True 21 | # using center sampling and GIoU. 22 | # Please refer to https://github.com/yqyao/FCOS_PLUS 23 | CENTER_SAMPLING_RADIUS: 1.5 24 | IOU_LOSS_TYPE: "giou" 25 | # we only use dcn in the last layer of towers 26 | USE_DCN_IN_TOWER: True 27 | DATASETS: 28 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_RANGE_TRAIN: (640, 800) 32 | MAX_SIZE_TRAIN: 1333 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1333 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.01 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (120000, 160000) 41 | MAX_ITER: 180000 42 | IMS_PER_BATCH: 16 43 | WARMUP_METHOD: "constant" 44 | 45 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_dcnv2_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | # FCOS with improvements 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 5 | RPN_ONLY: True 6 | FCOS_ON: True 7 | BACKBONE: 8 | CONV_BODY: "R-50-FPN-RETINANET" 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 256 11 | STAGE_WITH_DCN: (False, True, True, True) 12 | WITH_MODULATED_DCN: True 13 | DEFORMABLE_GROUPS: 1 14 | RETINANET: 15 | USE_C5: False # FCOS uses P5 instead of C5 16 | FCOS: 17 | # normalizing the regression targets with FPN strides 18 | NORM_REG_TARGETS: True 19 | # positioning centerness on the regress branch. 20 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 21 | CENTERNESS_ON_REG: True 22 | # using center sampling and GIoU. 23 | # Please refer to https://github.com/yqyao/FCOS_PLUS 24 | CENTER_SAMPLING_RADIUS: 1.5 25 | IOU_LOSS_TYPE: "giou" 26 | # we only use dcn in the last layer of towers 27 | USE_DCN_IN_TOWER: True 28 | DATASETS: 29 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 30 | TEST: ("coco_2014_minival",) 31 | INPUT: 32 | MIN_SIZE_TRAIN: (800,) 33 | MAX_SIZE_TRAIN: 1333 34 | MIN_SIZE_TEST: 800 35 | MAX_SIZE_TEST: 1333 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 16 44 | WARMUP_METHOD: "constant" 45 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_dcnv2_X_101_32x8d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | STAGE_WITH_DCN: (False, False, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RETINANET: 17 | USE_C5: False # FCOS uses P5 instead of C5 18 | FCOS: 19 | # normalizing the regression targets with FPN strides 20 | NORM_REG_TARGETS: True 21 | # positioning centerness on the regress branch. 22 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 23 | CENTERNESS_ON_REG: True 24 | # using center sampling and GIoU. 25 | # Please refer to https://github.com/yqyao/FCOS_PLUS 26 | CENTER_SAMPLING_RADIUS: 1.5 27 | IOU_LOSS_TYPE: "giou" 28 | # we only use dcn in the last layer of towers 29 | USE_DCN_IN_TOWER: True 30 | DATASETS: 31 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 32 | TEST: ("coco_2014_minival",) 33 | INPUT: 34 | MIN_SIZE_RANGE_TRAIN: (640, 800) 35 | MAX_SIZE_TRAIN: 1333 36 | MIN_SIZE_TEST: 800 37 | MAX_SIZE_TEST: 1333 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 16 46 | WARMUP_METHOD: "constant" 47 | -------------------------------------------------------------------------------- /configs/fcos/fcos_imprv_dcnv2_X_101_64x4d_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/k3ys35075jmU1RP/download#X-101-64x4d.pkl" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | STRIDE_IN_1X1: False 10 | BACKBONE_OUT_CHANNELS: 256 11 | NUM_GROUPS: 64 12 | WIDTH_PER_GROUP: 4 13 | STAGE_WITH_DCN: (False, False, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RETINANET: 17 | USE_C5: False # FCOS uses P5 instead of C5 18 | FCOS: 19 | # normalizing the regression targets with FPN strides 20 | NORM_REG_TARGETS: True 21 | # positioning centerness on the regress branch. 22 | # Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042 23 | CENTERNESS_ON_REG: True 24 | # using center sampling and GIoU. 25 | # Please refer to https://github.com/yqyao/FCOS_PLUS 26 | CENTER_SAMPLING_RADIUS: 1.5 27 | IOU_LOSS_TYPE: "giou" 28 | # we only use dcn in the last layer of towers 29 | USE_DCN_IN_TOWER: True 30 | DATASETS: 31 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 32 | TEST: ("coco_2014_minival",) 33 | INPUT: 34 | MIN_SIZE_RANGE_TRAIN: (640, 800) 35 | MAX_SIZE_TRAIN: 1333 36 | MIN_SIZE_TEST: 800 37 | MAX_SIZE_TEST: 1333 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 16 46 | WARMUP_METHOD: "constant" 47 | TEST: 48 | BBOX_AUG: 49 | ENABLED: False 50 | H_FLIP: True 51 | SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200) 52 | MAX_SIZE: 2000 53 | SCALE_H_FLIP: True 54 | -------------------------------------------------------------------------------- /configs/fcos/fcos_syncbn_bs32_MNV2_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "MNV2-FPN-RETINANET" 8 | FREEZE_CONV_BODY_AT: 0 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 256 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | USE_SYNCBN: True 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: (800,) 19 | MAX_SIZE_TRAIN: 1333 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1333 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.01 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | IMS_PER_BATCH: 32 30 | WARMUP_METHOD: "constant" 31 | -------------------------------------------------------------------------------- /configs/fcos/fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "MNV2-FPN-RETINANET" 8 | FREEZE_CONV_BODY_AT: 0 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 128 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | USE_SYNCBN: True 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_TRAIN: (800,) 19 | MAX_SIZE_TRAIN: 1333 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1333 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.01 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | IMS_PER_BATCH: 32 30 | WARMUP_METHOD: "constant" 31 | -------------------------------------------------------------------------------- /configs/fcos/fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "MNV2-FPN-RETINANET" 8 | FREEZE_CONV_BODY_AT: 0 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 128 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | USE_SYNCBN: True 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_RANGE_TRAIN: (640, 800) 19 | MAX_SIZE_TRAIN: 1333 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1333 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.01 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | IMS_PER_BATCH: 32 30 | WARMUP_METHOD: "constant" 31 | -------------------------------------------------------------------------------- /configs/fcos/fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth" 4 | RPN_ONLY: True 5 | FCOS_ON: True 6 | BACKBONE: 7 | CONV_BODY: "MNV2-FPN-RETINANET" 8 | FREEZE_CONV_BODY_AT: 0 9 | RESNETS: 10 | BACKBONE_OUT_CHANNELS: 128 11 | RETINANET: 12 | USE_C5: False # FCOS uses P5 instead of C5 13 | USE_SYNCBN: True 14 | DATASETS: 15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 16 | TEST: ("coco_2014_minival",) 17 | INPUT: 18 | MIN_SIZE_RANGE_TRAIN: (640, 800) 19 | MAX_SIZE_TRAIN: 1333 20 | MIN_SIZE_TEST: 800 21 | MAX_SIZE_TEST: 1333 22 | DATALOADER: 23 | SIZE_DIVISIBILITY: 32 24 | SOLVER: 25 | BASE_LR: 0.01 26 | WEIGHT_DECAY: 0.0001 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | IMS_PER_BATCH: 64 30 | WARMUP_METHOD: "constant" 31 | -------------------------------------------------------------------------------- /configs/gn_baselines/README.md: -------------------------------------------------------------------------------- 1 | ### Group Normalization 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494) 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md) 5 | 6 | 7 | ### Performance 8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP | 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:| 10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 | 11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 | 12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 | 13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 | 14 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (60000, 80000) 46 | MAX_ITER: 90000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (60000, 80000) 48 | MAX_ITER: 90000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 90000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 63 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (210000, 250000) 47 | MAX_ITER: 270000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | DATASETS: 40 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 41 | TEST: ("coco_2014_minival",) 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 8 gpus 46 | BASE_LR: 0.02 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (210000, 250000) 49 | MAX_ITER: 270000 50 | IMS_PER_BATCH: 16 51 | TEST: 52 | IMS_PER_BATCH: 8 53 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | ROI_MASK_HEAD: 38 | USE_GN: True # use GN for mask head 39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 40 | CONV_LAYERS: (256, 256, 256, 256) 41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 42 | PREDICTOR: "MaskRCNNC4Predictor" 43 | POOLER_RESOLUTION: 14 44 | POOLER_SAMPLING_RATIO: 2 45 | RESOLUTION: 28 46 | SHARE_BOX_FEATURE_EXTRACTOR: False 47 | MASK_ON: True 48 | DATASETS: 49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 50 | TEST: ("coco_2014_minival",) 51 | DATALOADER: 52 | SIZE_DIVISIBILITY: 32 53 | SOLVER: 54 | # Assume 8 gpus 55 | BASE_LR: 0.02 56 | WEIGHT_DECAY: 0.0001 57 | STEPS: (210000, 250000) 58 | MAX_ITER: 270000 59 | IMS_PER_BATCH: 16 60 | TEST: 61 | IMS_PER_BATCH: 8 62 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | CONV_HEAD_DIM: 256 36 | NUM_STACKED_CONVS: 4 37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 38 | PREDICTOR: "FPNPredictor" 39 | ROI_MASK_HEAD: 40 | USE_GN: True # use GN for mask head 41 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 42 | CONV_LAYERS: (256, 256, 256, 256) 43 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 44 | PREDICTOR: "MaskRCNNC4Predictor" 45 | POOLER_RESOLUTION: 14 46 | POOLER_SAMPLING_RATIO: 2 47 | RESOLUTION: 28 48 | SHARE_BOX_FEATURE_EXTRACTOR: False 49 | MASK_ON: True 50 | DATASETS: 51 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 52 | TEST: ("coco_2014_minival",) 53 | DATALOADER: 54 | SIZE_DIVISIBILITY: 32 55 | SOLVER: 56 | # Assume 8 gpus 57 | BASE_LR: 0.02 58 | WEIGHT_DECAY: 0.0001 59 | STEPS: (210000, 250000) 60 | MAX_ITER: 270000 61 | IMS_PER_BATCH: 16 62 | TEST: 63 | IMS_PER_BATCH: 8 64 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.001 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (50000, ) 17 | MAX_ITER: 70000 18 | IMS_PER_BATCH: 1 19 | TEST: 20 | IMS_PER_BATCH: 1 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 300 7 | ANCHOR_SIZES: (128, 256, 512) 8 | ROI_BOX_HEAD: 9 | NUM_CLASSES: 21 10 | DATASETS: 11 | TRAIN: ("voc_2007_train", "voc_2007_val") 12 | TEST: ("voc_2007_test",) 13 | SOLVER: 14 | BASE_LR: 0.004 15 | WEIGHT_DECAY: 0.0001 16 | STEPS: (12500, ) 17 | MAX_ITER: 17500 18 | IMS_PER_BATCH: 4 19 | TEST: 20 | IMS_PER_BATCH: 4 21 | -------------------------------------------------------------------------------- /configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 21 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("voc_2012_train_cocostyle",) 35 | TEST: ("voc_2012_val_cocostyle",) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (18000,) 42 | MAX_ITER: 24000 43 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | DATASETS: 10 | TRAIN: ("coco_2014_minival",) 11 | TEST: ("coco_2014_minival",) 12 | INPUT: 13 | MIN_SIZE_TRAIN: (600,) 14 | MAX_SIZE_TRAIN: 1000 15 | MIN_SIZE_TEST: 800 16 | MAX_SIZE_TEST: 1000 17 | SOLVER: 18 | BASE_LR: 0.005 19 | WEIGHT_DECAY: 0.0001 20 | STEPS: (1500,) 21 | MAX_ITER: 2000 22 | IMS_PER_BATCH: 2 23 | TEST: 24 | IMS_PER_BATCH: 2 25 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | DATASETS: 25 | TRAIN: ("coco_2014_minival",) 26 | TEST: ("coco_2014_minival",) 27 | INPUT: 28 | MIN_SIZE_TRAIN: (600,) 29 | MAX_SIZE_TRAIN: 1000 30 | MIN_SIZE_TEST: 800 31 | MAX_SIZE_TEST: 1000 32 | DATALOADER: 33 | SIZE_DIVISIBILITY: 32 34 | SOLVER: 35 | BASE_LR: 0.005 36 | WEIGHT_DECAY: 0.0001 37 | STEPS: (1500,) 38 | MAX_ITER: 2000 39 | IMS_PER_BATCH: 4 40 | TEST: 41 | IMS_PER_BATCH: 2 42 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | DATASETS: 28 | TRAIN: ("coco_2014_minival",) 29 | TEST: ("coco_2014_minival",) 30 | INPUT: 31 | MIN_SIZE_TRAIN: (600,) 32 | MAX_SIZE_TRAIN: 1000 33 | MIN_SIZE_TEST: 800 34 | MAX_SIZE_TEST: 1000 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.005 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (1500,) 41 | MAX_ITER: 2000 42 | IMS_PER_BATCH: 2 43 | TEST: 44 | IMS_PER_BATCH: 2 45 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 2 25 | ROI_KEYPOINT_HEAD: 26 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 27 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 28 | PREDICTOR: "KeypointRCNNPredictor" 29 | POOLER_RESOLUTION: 14 30 | POOLER_SAMPLING_RATIO: 2 31 | RESOLUTION: 56 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | KEYPOINT_ON: True 34 | DATASETS: 35 | TRAIN: ("keypoints_coco_2014_minival",) 36 | TEST: ("keypoints_coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 39 | MAX_SIZE_TRAIN: 1000 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1000 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (1500,) 48 | MAX_ITER: 2000 49 | IMS_PER_BATCH: 4 50 | TEST: 51 | IMS_PER_BATCH: 2 52 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | ROI_MASK_HEAD: 10 | PREDICTOR: "MaskRCNNC4Predictor" 11 | SHARE_BOX_FEATURE_EXTRACTOR: True 12 | MASK_ON: True 13 | DATASETS: 14 | TRAIN: ("coco_2014_minival",) 15 | TEST: ("coco_2014_minival",) 16 | INPUT: 17 | MIN_SIZE_TRAIN: (600,) 18 | MAX_SIZE_TRAIN: 1000 19 | MIN_SIZE_TEST: 800 20 | MAX_SIZE_TEST: 1000 21 | SOLVER: 22 | BASE_LR: 0.005 23 | WEIGHT_DECAY: 0.0001 24 | STEPS: (1500,) 25 | MAX_ITER: 2000 26 | IMS_PER_BATCH: 4 27 | TEST: 28 | IMS_PER_BATCH: 2 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | BATCH_SIZE_PER_IMAGE: 256 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | ROI_MASK_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 27 | PREDICTOR: "MaskRCNNC4Predictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 28 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | MASK_ON: True 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (1500,) 47 | MAX_ITER: 2000 48 | IMS_PER_BATCH: 4 49 | TEST: 50 | IMS_PER_BATCH: 2 51 | -------------------------------------------------------------------------------- /configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | BATCH_SIZE_PER_IMAGE: 256 21 | ROI_BOX_HEAD: 22 | POOLER_RESOLUTION: 7 23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 24 | POOLER_SAMPLING_RATIO: 2 25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 26 | PREDICTOR: "FPNPredictor" 27 | ROI_MASK_HEAD: 28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 30 | PREDICTOR: "MaskRCNNC4Predictor" 31 | POOLER_RESOLUTION: 14 32 | POOLER_SAMPLING_RATIO: 2 33 | RESOLUTION: 28 34 | SHARE_BOX_FEATURE_EXTRACTOR: False 35 | MASK_ON: True 36 | DATASETS: 37 | TRAIN: ("coco_2014_minival",) 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (600,) 41 | MAX_SIZE_TRAIN: 1000 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1000 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.005 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (1500,) 50 | MAX_ITER: 2000 51 | IMS_PER_BATCH: 2 52 | TEST: 53 | IMS_PER_BATCH: 2 54 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_C4_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_minival",) 10 | TEST: ("coco_2014_minival",) 11 | INPUT: 12 | MIN_SIZE_TRAIN: (600,) 13 | MAX_SIZE_TRAIN: 1000 14 | MIN_SIZE_TEST: 800 15 | MAX_SIZE_TEST: 1000 16 | SOLVER: 17 | BASE_LR: 0.005 18 | WEIGHT_DECAY: 0.0001 19 | STEPS: (1500,) 20 | MAX_ITER: 2000 21 | IMS_PER_BATCH: 4 22 | TEST: 23 | IMS_PER_BATCH: 2 24 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_minival",) 17 | TEST: ("coco_2014_minival",) 18 | INPUT: 19 | MIN_SIZE_TRAIN: (600,) 20 | MAX_SIZE_TRAIN: 1000 21 | MIN_SIZE_TEST: 800 22 | MAX_SIZE_TEST: 1000 23 | DATALOADER: 24 | SIZE_DIVISIBILITY: 32 25 | SOLVER: 26 | BASE_LR: 0.005 27 | WEIGHT_DECAY: 0.0001 28 | STEPS: (1500,) 29 | MAX_ITER: 2000 30 | IMS_PER_BATCH: 4 31 | TEST: 32 | IMS_PER_BATCH: 2 33 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800, ) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800, ) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2017_train", ) 35 | TEST: ("coco_2017_val",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (800,) 38 | MAX_SIZE_TRAIN: 1333 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1333 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 4 gpus 45 | BASE_LR: 0.005 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (120000, 160000) 48 | MAX_ITER: 180000 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | FG_IOU_THRESHOLD: 0.5 32 | BG_IOU_THRESHOLD: 0.4 33 | DATASETS: 34 | TRAIN: ("coco_2014_minival",) 35 | TEST: ("coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (600,) 38 | MAX_SIZE_TRAIN: 1000 39 | MIN_SIZE_TEST: 800 40 | MAX_SIZE_TEST: 1000 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | BASE_LR: 0.005 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (3500,) 47 | MAX_ITER: 4000 48 | IMS_PER_BATCH: 4 49 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-50-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | RPN: 11 | USE_FPN: True 12 | FG_IOU_THRESHOLD: 0.5 13 | BG_IOU_THRESHOLD: 0.4 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TRAIN: 2000 16 | PRE_NMS_TOP_N_TEST: 1000 17 | POST_NMS_TOP_N_TEST: 1000 18 | FPN_POST_NMS_TOP_N_TEST: 1000 19 | ROI_HEADS: 20 | USE_FPN: True 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 7 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | POOLER_SAMPLING_RATIO: 2 26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 27 | PREDICTOR: "FPNPredictor" 28 | RETINANET: 29 | SCALES_PER_OCTAVE: 3 30 | STRADDLE_THRESH: -1 31 | USE_C5: False 32 | FG_IOU_THRESHOLD: 0.5 33 | BG_IOU_THRESHOLD: 0.4 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | INPUT: 38 | MIN_SIZE_TRAIN: (800,) 39 | MAX_SIZE_TRAIN: 1333 40 | MIN_SIZE_TEST: 800 41 | MAX_SIZE_TEST: 1333 42 | DATALOADER: 43 | SIZE_DIVISIBILITY: 32 44 | SOLVER: 45 | # Assume 4 gpus 46 | BASE_LR: 0.005 47 | WEIGHT_DECAY: 0.0001 48 | STEPS: (120000, 160000) 49 | MAX_ITER: 180000 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | RETINANET_ON: True 6 | BACKBONE: 7 | CONV_BODY: "R-101-FPN-RETINANET" 8 | RESNETS: 9 | BACKBONE_OUT_CHANNELS: 256 10 | STRIDE_IN_1X1: False 11 | NUM_GROUPS: 32 12 | WIDTH_PER_GROUP: 8 13 | RPN: 14 | USE_FPN: True 15 | FG_IOU_THRESHOLD: 0.5 16 | BG_IOU_THRESHOLD: 0.4 17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 18 | PRE_NMS_TOP_N_TRAIN: 2000 19 | PRE_NMS_TOP_N_TEST: 1000 20 | POST_NMS_TOP_N_TEST: 1000 21 | FPN_POST_NMS_TOP_N_TEST: 1000 22 | ROI_HEADS: 23 | USE_FPN: True 24 | BATCH_SIZE_PER_IMAGE: 256 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | RETINANET: 32 | SCALES_PER_OCTAVE: 3 33 | STRADDLE_THRESH: -1 34 | FG_IOU_THRESHOLD: 0.5 35 | BG_IOU_THRESHOLD: 0.4 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | INPUT: 40 | MIN_SIZE_TRAIN: (800, ) 41 | MAX_SIZE_TRAIN: 1333 42 | MIN_SIZE_TEST: 800 43 | MAX_SIZE_TEST: 1333 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | # Assume 4 gpus 48 | BASE_LR: 0.0025 49 | WEIGHT_DECAY: 0.0001 50 | STEPS: (240000, 320000) 51 | MAX_ITER: 360000 52 | IMS_PER_BATCH: 4 53 | -------------------------------------------------------------------------------- /configs/rpn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | RPN: 6 | PRE_NMS_TOP_N_TEST: 12000 7 | POST_NMS_TOP_N_TEST: 2000 8 | DATASETS: 9 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 10 | TEST: ("coco_2014_minival",) 11 | SOLVER: 12 | BASE_LR: 0.02 13 | WEIGHT_DECAY: 0.0001 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | -------------------------------------------------------------------------------- /configs/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-50-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | RPN: 10 | USE_FPN: True 11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 2000 14 | FPN_POST_NMS_TOP_N_TEST: 2000 15 | DATASETS: 16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 17 | TEST: ("coco_2014_minival",) 18 | DATALOADER: 19 | SIZE_DIVISIBILITY: 32 20 | SOLVER: 21 | BASE_LR: 0.02 22 | WEIGHT_DECAY: 0.0001 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | -------------------------------------------------------------------------------- /configs/rpn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | RPN_ONLY: True 5 | BACKBONE: 6 | CONV_BODY: "R-101-FPN" 7 | RESNETS: 8 | BACKBONE_OUT_CHANNELS: 256 9 | STRIDE_IN_1X1: False 10 | NUM_GROUPS: 32 11 | WIDTH_PER_GROUP: 8 12 | RPN: 13 | USE_FPN: True 14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 2000 17 | FPN_POST_NMS_TOP_N_TEST: 2000 18 | DATASETS: 19 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 20 | TEST: ("coco_2014_minival",) 21 | DATALOADER: 22 | SIZE_DIVISIBILITY: 32 23 | SOLVER: 24 | BASE_LR: 0.02 25 | WEIGHT_DECAY: 0.0001 26 | STEPS: (60000, 80000) 27 | MAX_ITER: 90000 28 | -------------------------------------------------------------------------------- /fad_core/modeling/modules/augment_cells.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | """ CNN cell for network augmentation """ 9 | import torch 10 | import torch.nn as nn 11 | from fad_core.modeling import ops as ops 12 | from .search_cells import pad 13 | import fad_core.genotypes as gt 14 | 15 | 16 | class AugmentCell(nn.Module): 17 | """ Cell for augmentation 18 | Each edge is discrete. 19 | """ 20 | def __init__(self, genotype, C_pp, C_p, C, reduction_p, reduction, bottleNeck, norm=True, relu=True): 21 | super().__init__() 22 | self.reduction = reduction 23 | self.n_nodes = len(genotype.normal) 24 | self.bottleNeck = bottleNeck 25 | self.C_node = int(C*self.bottleNeck) 26 | 27 | if C_pp != self.C_node: 28 | self.preproc0 = ops.StdConv(C_pp, self.C_node, 1, 1, 0, norm=norm, relu=True) 29 | if C_p != self.C_node: 30 | self.preproc1 = ops.StdConv(C_p, self.C_node, 1, 1, 0, norm=norm, relu=True) 31 | 32 | # generate dag 33 | if reduction: 34 | gene = genotype.reduce 35 | self.concat = genotype.reduce_concat 36 | else: 37 | gene = genotype.normal 38 | self.concat = genotype.normal_concat 39 | 40 | self.dag = gt.to_dag(C, gene, reduction, self.bottleNeck, norm=norm, relu=relu) 41 | 42 | def forward(self, s0, s1): 43 | if s0.shape[1] != self.C_node: 44 | s0 = self.preproc0(s0) 45 | if s1.shape[1] != self.C_node: 46 | s1 = self.preproc1(s1) 47 | 48 | states = [s0, s1] 49 | 50 | for edges in self.dag: 51 | s_cur = sum(op(states[op.s_idx]) for op in edges) 52 | states.append(s_cur) 53 | 54 | s_out = torch.cat([states[i] for i in self.concat], dim=1) 55 | 56 | return s_out 57 | 58 | -------------------------------------------------------------------------------- /fad_core/modeling/modules/augment_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | """ CNN for network augmentation """ 9 | import torch 10 | import torch.nn as nn 11 | from .augment_cells import AugmentCell 12 | from fad_core.modeling import ops as ops 13 | import fad_core.genotypes as gt 14 | 15 | 16 | class AugmentRCNN(nn.Module): 17 | """ Augmented CNN model """ 18 | def __init__(self, C_in, C, n_layers, genotype, norm=True, C_node=None): 19 | """ 20 | Args: 21 | input_size: size of height and width (assuming height = width) 22 | C_in: # of input channels 23 | C: # of starting model channels 24 | """ 25 | super().__init__() 26 | 27 | bottleNeck = C_in / C if not C_node else C_node / C 28 | 29 | self.C_in = C_in 30 | self.C = C 31 | 32 | self.n_layers = n_layers 33 | self.genotype = gt.from_str(genotype) 34 | genotype = self.genotype 35 | 36 | C_cur = C_in 37 | 38 | C_pp, C_p, C_cur = C_cur, C_cur, C 39 | 40 | self.cells = nn.ModuleList() 41 | reduction_p = False 42 | reduction = reduction_p 43 | 44 | for i in range(n_layers): 45 | 46 | cell = AugmentCell(genotype, C_pp, C_p, C_cur, reduction_p, reduction, bottleNeck, norm=norm, relu=True) 47 | 48 | self.cells.append(cell) 49 | C_cur_out = int(C_cur * len(cell.concat) * bottleNeck) 50 | C_pp, C_p = C_p, C_cur_out 51 | 52 | 53 | def forward(self, x): 54 | if isinstance(x ,list): 55 | s0, s1 = x[0], x[1] 56 | else: 57 | s0 = s1 = x 58 | 59 | if isinstance(s0,list): s0 = s0[0] 60 | if isinstance(s1,list): s1 = s1[0] 61 | 62 | for i, cell in enumerate(self.cells): 63 | s0, s1 = s1, cell(s0, s1) 64 | 65 | return s1 66 | 67 | 68 | def drop_path_prob(self, p): 69 | """ Set drop path probability """ 70 | for module in self.modules(): 71 | if isinstance(module, ops.DropPath_): 72 | module.p = p 73 | -------------------------------------------------------------------------------- /fad_core/modeling/rpn/fcos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fad_core/modeling/rpn/fcos/__init__.py -------------------------------------------------------------------------------- /fad_core/visualize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Malong LLC 2 | # All rights reserved. 3 | # 4 | # Contact: github@malongtech.com 5 | # 6 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 7 | 8 | """ Network architecture visualizer using graphviz """ 9 | import sys 10 | from graphviz import Digraph 11 | import fad_core.genotypes as gt 12 | 13 | 14 | def plot(genotype, file_path, caption=None): 15 | """ make DAG plot and save to file_path as .png """ 16 | edge_attr = { 17 | 'fontsize': '20', 18 | 'fontname': 'times' 19 | } 20 | node_attr = { 21 | 'style': 'filled', 22 | 'shape': 'rect', 23 | 'align': 'center', 24 | 'fontsize': '20', 25 | 'height': '0.5', 26 | 'width': '0.5', 27 | 'penwidth': '2', 28 | 'fontname': 'times' 29 | } 30 | g = Digraph( 31 | format='png', 32 | edge_attr=edge_attr, 33 | node_attr=node_attr, 34 | engine='dot') 35 | g.body.extend(['rankdir=LR']) 36 | 37 | # input nodes 38 | g.node("c_{k-2}", fillcolor='darkseagreen2') 39 | g.node("c_{k-1}", fillcolor='darkseagreen2') 40 | 41 | # intermediate nodes 42 | n_nodes = len(genotype) 43 | for i in range(n_nodes): 44 | g.node(str(i), fillcolor='lightblue') 45 | 46 | for i, edges in enumerate(genotype): 47 | for op, j in edges: 48 | if j == 0: 49 | u = "c_{k-2}" 50 | elif j == 1: 51 | u = "c_{k-1}" 52 | else: 53 | u = str(j-2) 54 | 55 | v = str(i) 56 | g.edge(u, v, label=op, fillcolor="gray") 57 | 58 | # output node 59 | g.node("c_{k}", fillcolor='palegoldenrod') 60 | for i in range(n_nodes): 61 | g.edge(str(i), "c_{k}", fillcolor="gray") 62 | 63 | # add image caption 64 | if caption: 65 | g.attr(label=caption, overlap='false', fontsize='20', fontname='times') 66 | 67 | g.render(file_path, view=False) 68 | 69 | 70 | if __name__ == '__main__': 71 | if len(sys.argv) != 2: 72 | raise ValueError("usage:\n python {} GENOTYPE".format(sys.argv[0])) 73 | 74 | genotype_str = sys.argv[1] 75 | try: 76 | genotype = gt.from_str(genotype_str) 77 | except AttributeError: 78 | raise ValueError("Cannot parse {}".format(genotype_str)) 79 | 80 | plot(genotype.normal, "normal") 81 | 82 | -------------------------------------------------------------------------------- /fcos_core/README.md: -------------------------------------------------------------------------------- 1 | # FCOS Core 2 | Note that `fcos_core` corresponds to `maskrcnn_benchmark` in the original maskrcnn_benchmark repository. \ 3 | We changed the name to avoid conflicts with the original maskrcnn-benchmark installation. 4 | 5 | The core code of FCOS detector is located under [modeling/rpn/fcos](modeling/rpn/fcos). 6 | -------------------------------------------------------------------------------- /fcos_core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /fcos_core/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /fcos_core/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /fcos_core/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /fcos_core/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /fcos_core/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /fcos_core/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /fcos_core/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /fcos_core/csrc/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor ml_nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const at::Tensor& labels, 13 | const float threshold) { 14 | 15 | if (dets.type().is_cuda()) { 16 | #ifdef WITH_CUDA 17 | // TODO raise error if not compiled with CUDA 18 | if (dets.numel() == 0) 19 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 20 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 21 | return ml_nms_cuda(b, threshold); 22 | #else 23 | AT_ERROR("Not compiled with GPU support"); 24 | #endif 25 | } 26 | AT_ERROR("CPU version not implemented"); 27 | } 28 | -------------------------------------------------------------------------------- /fcos_core/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /fcos_core/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ml_nms.h" 4 | #include "ROIAlign.h" 5 | #include "ROIPool.h" 6 | #include "SigmoidFocalLoss.h" 7 | #include "deform_conv.h" 8 | #include "deform_pool.h" 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("nms", &nms, "non-maximum suppression"); 12 | m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression"); 13 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 14 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 15 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 16 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 17 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 18 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 19 | // dcn-v2 20 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 21 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 22 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 23 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 24 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 25 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 26 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 27 | } 28 | -------------------------------------------------------------------------------- /fcos_core/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /fcos_core/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from fcos_core.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | 22 | 23 | class BBoxAugCollator(object): 24 | """ 25 | From a list of samples from the dataset, 26 | returns the images and targets. 27 | Images should be converted to batched images in `im_detect_bbox_aug` 28 | """ 29 | 30 | def __call__(self, batch): 31 | return list(zip(*batch)) 32 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | 6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"] 7 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from fcos_core.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | **kwargs: other args. 15 | Returns: 16 | evaluation result 17 | """ 18 | args = dict( 19 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 20 | ) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | else: 26 | dataset_name = dataset.__class__.__name__ 27 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 28 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("fcos_core.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /fcos_core/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from fcos_core.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /fcos_core/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /fcos_core/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /fcos_core/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /fcos_core/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /fcos_core/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | if cfg.INPUT.MIN_SIZE_RANGE_TRAIN[0] == -1: 8 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 9 | else: 10 | assert len(cfg.INPUT.MIN_SIZE_RANGE_TRAIN) == 2, \ 11 | "MIN_SIZE_RANGE_TRAIN must have two elements (lower bound, upper bound)" 12 | min_size = list(range( 13 | cfg.INPUT.MIN_SIZE_RANGE_TRAIN[0], 14 | cfg.INPUT.MIN_SIZE_RANGE_TRAIN[1] + 1 15 | )) 16 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 17 | flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 18 | else: 19 | min_size = cfg.INPUT.MIN_SIZE_TEST 20 | max_size = cfg.INPUT.MAX_SIZE_TEST 21 | flip_prob = 0 22 | 23 | to_bgr255 = cfg.INPUT.TO_BGR255 24 | normalize_transform = T.Normalize( 25 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 26 | ) 27 | 28 | transform = T.Compose( 29 | [ 30 | T.Resize(min_size, max_size), 31 | T.RandomHorizontalFlip(flip_prob), 32 | T.ToTensor(), 33 | normalize_transform, 34 | ] 35 | ) 36 | return transform 37 | -------------------------------------------------------------------------------- /fcos_core/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /fcos_core/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import DFConv2d 7 | from .misc import ConvTranspose2d 8 | from .misc import BatchNorm2d 9 | from .misc import interpolate 10 | from .nms import nms, ml_nms 11 | from .roi_align import ROIAlign 12 | from .roi_align import roi_align 13 | from .roi_pool import ROIPool 14 | from .roi_pool import roi_pool 15 | from .smooth_l1_loss import smooth_l1_loss 16 | from .sigmoid_focal_loss import SigmoidFocalLoss 17 | from .iou_loss import IOULoss 18 | from .scale import Scale 19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, \ 21 | ModulatedDeformConvPack 22 | from .dcn.deform_pool_func import deform_roi_pooling 23 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, \ 24 | ModulatedDeformRoIPoolingPack 25 | 26 | 27 | __all__ = [ 28 | "nms", 29 | "ml_nms", 30 | "roi_align", 31 | "ROIAlign", 32 | "roi_pool", 33 | "ROIPool", 34 | "smooth_l1_loss", 35 | "Conv2d", 36 | "DFConv2d", 37 | "ConvTranspose2d", 38 | "interpolate", 39 | "BatchNorm2d", 40 | "FrozenBatchNorm2d", 41 | "SigmoidFocalLoss", 42 | 'deform_conv', 43 | 'modulated_deform_conv', 44 | 'DeformConv', 45 | 'ModulatedDeformConv', 46 | 'ModulatedDeformConvPack', 47 | 'deform_roi_pooling', 48 | 'DeformRoIPooling', 49 | 'DeformRoIPoolingPack', 50 | 'ModulatedDeformRoIPoolingPack', 51 | "IOULoss", 52 | "Scale" 53 | ] 54 | -------------------------------------------------------------------------------- /fcos_core/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /fcos_core/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /fcos_core/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # -------------------------------------------------------------------------------- /fcos_core/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from fcos_core import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /fcos_core/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | # GIoU and Linear IoU are added by following 2 | # https://github.com/yqyao/FCOS_PLUS/blob/master/maskrcnn_benchmark/layers/iou_loss.py. 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class IOULoss(nn.Module): 8 | def __init__(self, loss_type="iou"): 9 | super(IOULoss, self).__init__() 10 | self.loss_type = loss_type 11 | 12 | def forward(self, pred, target, weight=None): 13 | pred_left = pred[:, 0] 14 | pred_top = pred[:, 1] 15 | pred_right = pred[:, 2] 16 | pred_bottom = pred[:, 3] 17 | 18 | target_left = target[:, 0] 19 | target_top = target[:, 1] 20 | target_right = target[:, 2] 21 | target_bottom = target[:, 3] 22 | 23 | target_area = (target_left + target_right) * \ 24 | (target_top + target_bottom) 25 | pred_area = (pred_left + pred_right) * \ 26 | (pred_top + pred_bottom) 27 | 28 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right) 29 | g_w_intersect = torch.max(pred_left, target_left) + torch.max( 30 | pred_right, target_right) 31 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top) 32 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top) 33 | ac_uion = g_w_intersect * g_h_intersect + 1e-7 34 | area_intersect = w_intersect * h_intersect 35 | area_union = target_area + pred_area - area_intersect 36 | ious = (area_intersect + 1.0) / (area_union + 1.0) 37 | gious = ious - (ac_uion - area_union) / ac_uion 38 | if self.loss_type == 'iou': 39 | losses = -torch.log(ious) 40 | elif self.loss_type == 'linear_iou': 41 | losses = 1 - ious 42 | elif self.loss_type == 'giou': 43 | losses = 1 - gious 44 | else: 45 | raise NotImplementedError 46 | 47 | if weight is not None and weight.sum() > 0: 48 | return (losses * weight).sum() 49 | else: 50 | assert losses.numel() != 0 51 | return losses.sum() 52 | -------------------------------------------------------------------------------- /fcos_core/layers/iou_loss_search.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class IOULossSearch(nn.Module): 6 | def forward(self, pred, target, weight=None): 7 | pred_left = pred[:, 0] 8 | pred_top = pred[:, 1] 9 | pred_right = pred[:, 2] 10 | pred_bottom = pred[:, 3] 11 | 12 | target_left = target[:, 0] 13 | target_top = target[:, 1] 14 | target_right = target[:, 2] 15 | target_bottom = target[:, 3] 16 | 17 | target_aera = (target_left + target_right) * \ 18 | (target_top + target_bottom) 19 | pred_aera = (pred_left + pred_right) * \ 20 | (pred_top + pred_bottom) 21 | 22 | w_intersect = torch.min(pred_left, target_left) + \ 23 | torch.min(pred_right, target_right) 24 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 25 | torch.min(pred_top, target_top) 26 | 27 | area_intersect = w_intersect * h_intersect 28 | area_union = target_aera + pred_aera - area_intersect 29 | 30 | losses = -torch.log((area_intersect + 1.0) / (area_union + 1.0)) 31 | 32 | if weight is not None and weight.sum() > 0: 33 | return (losses * weight).sum() / weight.sum() 34 | else: 35 | assert losses.numel() != 0 36 | return losses.mean() 37 | -------------------------------------------------------------------------------- /fcos_core/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from fcos_core import _C 4 | 5 | nms = _C.nms 6 | ml_nms = _C.ml_nms 7 | # nms.__doc__ = """ 8 | # This function performs Non-maximum suppresion""" 9 | -------------------------------------------------------------------------------- /fcos_core/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from fcos_core import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /fcos_core/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from fcos_core import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /fcos_core/layers/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | def __init__(self, init_value=1.0): 7 | super(Scale, self).__init__() 8 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 9 | 10 | def forward(self, input): 11 | return input * self.scale 12 | -------------------------------------------------------------------------------- /fcos_core/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from fcos_core import _C 7 | 8 | # TODO: Use JIT to replace CUDA implementation in the future. 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, gamma, alpha): 12 | ctx.save_for_backward(logits, targets) 13 | num_classes = logits.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | losses = _C.sigmoid_focalloss_forward( 19 | logits, targets, num_classes, gamma, alpha 20 | ) 21 | return losses 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, d_loss): 26 | logits, targets = ctx.saved_tensors 27 | num_classes = ctx.num_classes 28 | gamma = ctx.gamma 29 | alpha = ctx.alpha 30 | d_loss = d_loss.contiguous() 31 | d_logits = _C.sigmoid_focalloss_backward( 32 | logits, targets, d_loss, num_classes, gamma, alpha 33 | ) 34 | return d_logits, None, None, None, None 35 | 36 | 37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 38 | 39 | 40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 41 | num_classes = logits.shape[1] 42 | gamma = gamma[0] 43 | alpha = alpha[0] 44 | dtype = targets.dtype 45 | device = targets.device 46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 47 | 48 | t = targets.unsqueeze(1) 49 | p = torch.sigmoid(logits) 50 | term1 = (1 - p) ** gamma * torch.log(p) 51 | term2 = p ** gamma * torch.log(1 - p) 52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | return loss.sum() 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "gamma=" + str(self.gamma) 74 | tmpstr += ", alpha=" + str(self.alpha) 75 | tmpstr += ")" 76 | return tmpstr 77 | -------------------------------------------------------------------------------- /fcos_core/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /fcos_core/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | from . import fbnet 4 | -------------------------------------------------------------------------------- /fcos_core/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /fcos_core/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /fcos_core/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN, GeneralizedRNAS 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, "GeneralizedRNAS": GeneralizedRNAS} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /fcos_core/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from fcos_core.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | RPN_HEADS = Registry() 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 8 | ROI_BOX_PREDICTOR = Registry() 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 10 | ROI_KEYPOINT_PREDICTOR = Registry() 11 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 12 | ROI_MASK_PREDICTOR = Registry() 13 | -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from fcos_core.modeling import registry 3 | from torch import nn 4 | 5 | 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | 12 | num_inputs = in_channels 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AdaptiveAvgPool2d(1) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 18 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 19 | 20 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 21 | nn.init.constant_(self.cls_score.bias, 0) 22 | 23 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 24 | nn.init.constant_(self.bbox_pred.bias, 0) 25 | 26 | def forward(self, x): 27 | x = self.avgpool(x) 28 | x = x.view(x.size(0), -1) 29 | cls_logit = self.cls_score(x) 30 | bbox_pred = self.bbox_pred(x) 31 | return cls_logit, bbox_pred 32 | 33 | 34 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 35 | class FPNPredictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(FPNPredictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | representation_size = in_channels 40 | 41 | self.cls_score = nn.Linear(representation_size, num_classes) 42 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 43 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 44 | 45 | nn.init.normal_(self.cls_score.weight, std=0.01) 46 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 47 | for l in [self.cls_score, self.bbox_pred]: 48 | nn.init.constant_(l.bias, 0) 49 | 50 | def forward(self, x): 51 | if x.ndimension() == 4: 52 | assert list(x.shape[2:]) == [1, 1] 53 | x = x.view(x.size(0), -1) 54 | scores = self.cls_score(x) 55 | bbox_deltas = self.bbox_pred(x) 56 | 57 | return scores, bbox_deltas 58 | 59 | 60 | def make_roi_box_predictor(cfg, in_channels): 61 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg, in_channels) 63 | -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/roi_heads/keypoint_head/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg, in_channels): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 14 | self.predictor = make_roi_keypoint_predictor( 15 | cfg, self.feature_extractor.out_channels) 16 | self.post_processor = make_roi_keypoint_post_processor(cfg) 17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 18 | 19 | def forward(self, features, proposals, targets=None): 20 | """ 21 | Arguments: 22 | features (list[Tensor]): feature-maps from possibly several levels 23 | proposals (list[BoxList]): proposal boxes 24 | targets (list[BoxList], optional): the ground-truth targets. 25 | 26 | Returns: 27 | x (Tensor): the result of the feature extractor 28 | proposals (list[BoxList]): during training, the original proposals 29 | are returned. During testing, the predicted boxlists are returned 30 | with the `mask` field set 31 | losses (dict[Tensor]): During training, returns the losses for the 32 | head. During testing, returns an empty dict. 33 | """ 34 | if self.training: 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.subsample(proposals, targets) 37 | 38 | x = self.feature_extractor(features, proposals) 39 | kp_logits = self.predictor(x) 40 | 41 | if not self.training: 42 | result = self.post_processor(kp_logits, proposals) 43 | return x, result, {} 44 | 45 | loss_kp = self.loss_evaluator(proposals, kp_logits) 46 | 47 | return x, proposals, dict(loss_kp=loss_kp) 48 | 49 | 50 | def build_roi_keypoint_head(cfg, in_channels): 51 | return ROIKeypointHead(cfg, in_channels) 52 | -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from fcos_core.modeling import registry 5 | from fcos_core.modeling.poolers import Pooler 6 | 7 | from fcos_core.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from fcos_core import layers 4 | from fcos_core.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from fcos_core.layers import Conv2d 6 | from fcos_core.layers import ConvTranspose2d 7 | from fcos_core.modeling import registry 8 | 9 | 10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 11 | class MaskRCNNC4Predictor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(MaskRCNNC4Predictor, self).__init__() 14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 16 | num_inputs = in_channels 17 | 18 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 19 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 20 | 21 | for name, param in self.named_parameters(): 22 | if "bias" in name: 23 | nn.init.constant_(param, 0) 24 | elif "weight" in name: 25 | # Caffe2 implementation uses MSRAFill, which in fact 26 | # corresponds to kaiming_normal_ in PyTorch 27 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 28 | 29 | def forward(self, x): 30 | x = F.relu(self.conv5_mask(x)) 31 | return self.mask_fcn_logits(x) 32 | 33 | 34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 35 | class MaskRCNNConv1x1Predictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(MaskRCNNConv1x1Predictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | num_inputs = in_channels 40 | 41 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 42 | 43 | for name, param in self.named_parameters(): 44 | if "bias" in name: 45 | nn.init.constant_(param, 0) 46 | elif "weight" in name: 47 | # Caffe2 implementation uses MSRAFill, which in fact 48 | # corresponds to kaiming_normal_ in PyTorch 49 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 50 | 51 | def forward(self, x): 52 | return self.mask_fcn_logits(x) 53 | 54 | 55 | def make_roi_mask_predictor(cfg, in_channels): 56 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 57 | return func(cfg, in_channels) 58 | -------------------------------------------------------------------------------- /fcos_core/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /fcos_core/modeling/rpn/fcos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/rpn/fcos/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/modeling/rpn/retinanet/__init__.py -------------------------------------------------------------------------------- /fcos_core/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Utility functions minipulating the prediction layers 4 | """ 5 | 6 | from ..utils import cat 7 | 8 | import torch 9 | 10 | def permute_and_flatten(layer, N, A, C, H, W): 11 | layer = layer.view(N, -1, C, H, W) 12 | layer = layer.permute(0, 3, 4, 1, 2) 13 | layer = layer.reshape(N, -1, C) 14 | return layer 15 | 16 | 17 | def concat_box_prediction_layers(box_cls, box_regression): 18 | box_cls_flattened = [] 19 | box_regression_flattened = [] 20 | # for each feature level, permute the outputs to make them be in the 21 | # same format as the labels. Note that the labels are computed for 22 | # all feature levels concatenated, so we keep the same representation 23 | # for the objectness and the box_regression 24 | for box_cls_per_level, box_regression_per_level in zip( 25 | box_cls, box_regression 26 | ): 27 | N, AxC, H, W = box_cls_per_level.shape 28 | Ax4 = box_regression_per_level.shape[1] 29 | A = Ax4 // 4 30 | C = AxC // A 31 | box_cls_per_level = permute_and_flatten( 32 | box_cls_per_level, N, A, C, H, W 33 | ) 34 | box_cls_flattened.append(box_cls_per_level) 35 | 36 | box_regression_per_level = permute_and_flatten( 37 | box_regression_per_level, N, A, 4, H, W 38 | ) 39 | box_regression_flattened.append(box_regression_per_level) 40 | # concatenate on the first dimension (representing the feature levels), to 41 | # take into account the way the labels were generated (with all feature maps 42 | # being concatenated as well) 43 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 44 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 45 | return box_cls, box_regression 46 | -------------------------------------------------------------------------------- /fcos_core/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /fcos_core/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /fcos_core/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import logging 4 | from .lr_scheduler import WarmupMultiStepLR 5 | import re 6 | 7 | 8 | def make_optimizer(cfg, model): 9 | logger = logging.getLogger("fcos_core.trainer") 10 | params = [] 11 | for key, value in model.named_parameters(): 12 | if not value.requires_grad: 13 | continue 14 | if cfg.MODEL.FAD.USE_CHANNEL_LR: 15 | if "box_tower" in key or "cls_tower" in key: 16 | assert len(cfg.MODEL.FAD.CHANNEL_LIST_CLS) == len(cfg.MODEL.FAD.CHANNEL_LR_CLS) 17 | assert len(cfg.MODEL.FAD.CHANNEL_LIST_BOX) == len(cfg.MODEL.FAD.CHANNEL_LR_BOX) 18 | candidate_lrs = cfg.MODEL.FAD.CHANNEL_LR_CLS if "cls_tower" in key \ 19 | else cfg.MODEL.FAD.CHANNEL_LR_BOX 20 | info = re.findall(r"dag.(\d).(\d).(\d).(\d)", key) 21 | assert len(info) == 1 and len(info[0]) == 4 22 | # get channel index 23 | channel_idx = int(info[0][2]) 24 | lr = cfg.SOLVER.BASE_LR * candidate_lrs[channel_idx] 25 | else: 26 | lr = cfg.SOLVER.BASE_LR 27 | else: 28 | lr = cfg.SOLVER.BASE_LR 29 | 30 | 31 | 32 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 33 | if "bias" in key: 34 | lr *= cfg.SOLVER.BIAS_LR_FACTOR 35 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 36 | if key.endswith(".offset.weight") or key.endswith(".offset.bias"): 37 | logger.info("set lr factor of {} as {}".format( 38 | key, cfg.SOLVER.DCONV_OFFSETS_LR_FACTOR 39 | )) 40 | lr *= cfg.SOLVER.DCONV_OFFSETS_LR_FACTOR 41 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 42 | 43 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 44 | return optimizer 45 | 46 | 47 | def make_lr_scheduler(cfg, optimizer): 48 | return WarmupMultiStepLR( 49 | optimizer, 50 | cfg.SOLVER.STEPS, 51 | cfg.SOLVER.GAMMA, 52 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 53 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 54 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 55 | ) 56 | -------------------------------------------------------------------------------- /fcos_core/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /fcos_core/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/structures/__init__.py -------------------------------------------------------------------------------- /fcos_core/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | if tensors.dim() == 3: 45 | tensors = tensors[None] 46 | assert tensors.dim() == 4 47 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 48 | return ImageList(tensors, image_sizes) 49 | elif isinstance(tensors, (tuple, list)): 50 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 51 | 52 | # TODO Ideally, just remove this and let me model handle arbitrary 53 | # input sizs 54 | if size_divisible > 0: 55 | import math 56 | 57 | stride = size_divisible 58 | max_size = list(max_size) 59 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 60 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 61 | max_size = tuple(max_size) 62 | 63 | batch_shape = (len(tensors),) + max_size 64 | batched_imgs = tensors[0].new(*batch_shape).zero_() 65 | for img, pad_img in zip(tensors, batched_imgs): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | 68 | image_sizes = [im.shape[-2:] for im in tensors] 69 | 70 | return ImageList(batched_imgs, image_sizes) 71 | else: 72 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 73 | -------------------------------------------------------------------------------- /fcos_core/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /fcos_core/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msight-tech/research-fad/fec35c0ab271a461e2d86b03ee9077453c6244e6/fcos_core/utils/__init__.py -------------------------------------------------------------------------------- /fcos_core/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /fcos_core/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /fcos_core/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from fcos_core.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("fcos_core.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /fcos_core/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /fcos_core/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /fcos_core/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /fcos_core/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /fcos_core/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /fcos_core/utils/tensorboard.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import time 4 | from datetime import datetime 5 | from .comm import is_main_process 6 | 7 | # From https://github.com/facebookresearch/maskrcnn-benchmark/pull/163 8 | # A littel modification is to add 'output_dir' 9 | 10 | 11 | def get_tensorboard_writer(output_dir): 12 | try: 13 | from tensorboardX import SummaryWriter 14 | except ImportError: 15 | raise ImportError( 16 | 'To use tensorboard please install tensorboardX ' 17 | '[ pip install tensorflow tensorboardX ].' 18 | ) 19 | 20 | if is_main_process(): 21 | timestamp = datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H-%M') 22 | tb_logger_output_dir = os.path.join(output_dir, "tb_logger") 23 | tb_logger = SummaryWriter(os.path.join( 24 | tb_logger_output_dir, 'maskrcnn-{}'.format(timestamp))) 25 | return tb_logger 26 | else: 27 | return None 28 | -------------------------------------------------------------------------------- /fcos_core/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | ninja 4 | yacs 5 | cython 6 | matplotlib 7 | tqdm 8 | opencv-python 9 | scikit-image 10 | tensorboard 11 | graphviz 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Malong LLC 3 | # All rights reserved. 4 | # 5 | # Contact: github@malongtech.com 6 | # 7 | # This source code is licensed under the LICENSE file in the root directory of this source tree. 8 | 9 | import glob 10 | import os 11 | 12 | import torch 13 | from setuptools import find_packages 14 | from setuptools import setup 15 | from torch.utils.cpp_extension import CUDA_HOME 16 | from torch.utils.cpp_extension import CppExtension 17 | from torch.utils.cpp_extension import CUDAExtension 18 | 19 | 20 | requirements = [ 21 | "torchvision", 22 | "ninja", 23 | "yacs", 24 | "cython", 25 | "matplotlib", 26 | "tqdm", 27 | "opencv-python", 28 | "scikit-image" 29 | ] 30 | 31 | 32 | def get_extensions(): 33 | extensions_dir = os.path.join("fcos_core", "csrc") 34 | 35 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 36 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 37 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 38 | sources = main_file + source_cpu 39 | 40 | extension = CppExtension 41 | 42 | extra_compile_args = {"cxx": []} 43 | define_macros = [] 44 | 45 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 46 | extension = CUDAExtension 47 | sources += source_cuda 48 | define_macros += [("WITH_CUDA", None)] 49 | extra_compile_args["nvcc"] = [ 50 | "-DCUDA_HAS_FP16=1", 51 | "-D__CUDA_NO_HALF_OPERATORS__", 52 | "-D__CUDA_NO_HALF_CONVERSIONS__", 53 | "-D__CUDA_NO_HALF2_OPERATORS__", 54 | ] 55 | 56 | include_dirs = [extensions_dir] 57 | 58 | ext_modules = [ 59 | extension( 60 | "fcos_core._C", 61 | sources, 62 | include_dirs=include_dirs, 63 | define_macros=define_macros, 64 | extra_compile_args=extra_compile_args 65 | ) 66 | ] 67 | 68 | return ext_modules 69 | 70 | 71 | setup( 72 | name="fad", 73 | version="0.1.0", 74 | author="Malong Technologies", 75 | url="https://github.com/MalongTech/research-fad", 76 | description="FAD in pytorch", 77 | packages=find_packages(exclude=("configs", "tests", )), 78 | install_requires=requirements, 79 | ext_modules=get_extensions(), 80 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 81 | include_package_data=True, 82 | ) 83 | -------------------------------------------------------------------------------- /tests/env_tests/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import os 4 | 5 | 6 | def get_config_root_path(): 7 | ''' Path to configs for unit tests ''' 8 | # cur_file_dir is root/tests/env_tests 9 | cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) 10 | ret = os.path.dirname(os.path.dirname(cur_file_dir)) 11 | ret = os.path.join(ret, "configs") 12 | return ret 13 | -------------------------------------------------------------------------------- /tests/test_backbones.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register backbones 7 | from fcos_core.modeling.backbone import build_backbone # NoQA 8 | from fcos_core.modeling import registry 9 | from fcos_core.config import cfg as g_cfg 10 | from utils import load_config 11 | 12 | 13 | # overwrite configs if specified, otherwise default config is used 14 | BACKBONE_CFGS = { 15 | "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml", 16 | "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 17 | "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml", 18 | "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml", 19 | "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml", 20 | } 21 | 22 | 23 | class TestBackbones(unittest.TestCase): 24 | def test_build_backbones(self): 25 | ''' Make sure backbones run ''' 26 | 27 | self.assertGreater(len(registry.BACKBONES), 0) 28 | 29 | for name, backbone_builder in registry.BACKBONES.items(): 30 | print('Testing {}...'.format(name)) 31 | if name in BACKBONE_CFGS: 32 | cfg = load_config(BACKBONE_CFGS[name]) 33 | else: 34 | # Use default config if config file is not specified 35 | cfg = copy.deepcopy(g_cfg) 36 | backbone = backbone_builder(cfg) 37 | 38 | # make sures the backbone has `out_channels` 39 | self.assertIsNotNone( 40 | getattr(backbone, 'out_channels', None), 41 | 'Need to provide out_channels for backbone {}'.format(name) 42 | ) 43 | 44 | N, C_in, H, W = 2, 3, 224, 256 45 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 46 | out = backbone(input) 47 | for cur_out in out: 48 | self.assertEqual( 49 | cur_out.shape[:2], 50 | torch.Size([N, backbone.out_channels]) 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/test_configs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import glob 5 | import os 6 | import utils 7 | 8 | 9 | class TestConfigs(unittest.TestCase): 10 | def test_configs_load(self): 11 | ''' Make sure configs are loadable ''' 12 | 13 | cfg_root_path = utils.get_config_root_path() 14 | files = glob.glob( 15 | os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True) 16 | self.assertGreater(len(files), 0) 17 | 18 | for fn in files: 19 | print('Loading {}...'.format(fn)) 20 | utils.load_config_from_file(fn) 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /tests/test_metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | 4 | from fcos_core.utils.metric_logger import MetricLogger 5 | 6 | 7 | class TestMetricLogger(unittest.TestCase): 8 | def test_update(self): 9 | meter = MetricLogger() 10 | for i in range(10): 11 | meter.update(metric=float(i)) 12 | 13 | m = meter.meters["metric"] 14 | self.assertEqual(m.count, 10) 15 | self.assertEqual(m.total, 45) 16 | self.assertEqual(m.median, 4) 17 | self.assertEqual(m.avg, 4.5) 18 | 19 | def test_no_attr(self): 20 | meter = MetricLogger() 21 | _ = meter.meters 22 | _ = meter.delimiter 23 | def broken(): 24 | _ = meter.not_existent 25 | self.assertRaises(AttributeError, broken) 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/test_rpn_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import unittest 4 | import copy 5 | import torch 6 | # import modules to to register rpn heads 7 | from fcos_core.modeling.backbone import build_backbone # NoQA 8 | from fcos_core.modeling.rpn.rpn import build_rpn # NoQA 9 | from fcos_core.modeling import registry 10 | from fcos_core.config import cfg as g_cfg 11 | from utils import load_config 12 | 13 | 14 | # overwrite configs if specified, otherwise default config is used 15 | RPN_CFGS = { 16 | } 17 | 18 | 19 | class TestRPNHeads(unittest.TestCase): 20 | def test_build_rpn_heads(self): 21 | ''' Make sure rpn heads run ''' 22 | 23 | self.assertGreater(len(registry.RPN_HEADS), 0) 24 | 25 | in_channels = 64 26 | num_anchors = 10 27 | 28 | for name, builder in registry.RPN_HEADS.items(): 29 | print('Testing {}...'.format(name)) 30 | if name in RPN_CFGS: 31 | cfg = load_config(RPN_CFGS[name]) 32 | else: 33 | # Use default config if config file is not specified 34 | cfg = copy.deepcopy(g_cfg) 35 | 36 | rpn = builder(cfg, in_channels, num_anchors) 37 | 38 | N, C_in, H, W = 2, in_channels, 24, 32 39 | input = torch.rand([N, C_in, H, W], dtype=torch.float32) 40 | LAYERS = 3 41 | out = rpn([input] * LAYERS) 42 | self.assertEqual(len(out), 2) 43 | logits, bbox_reg = out 44 | for idx in range(LAYERS): 45 | self.assertEqual( 46 | logits[idx].shape, 47 | torch.Size([ 48 | input.shape[0], num_anchors, 49 | input.shape[2], input.shape[3], 50 | ]) 51 | ) 52 | self.assertEqual( 53 | bbox_reg[idx].shape, 54 | torch.Size([ 55 | logits[idx].shape[0], num_anchors * 4, 56 | logits[idx].shape[2], logits[idx].shape[3], 57 | ]), 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /tests/test_segmentation_mask.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import unittest 3 | import torch 4 | from fcos_core.structures.segmentation_mask import SegmentationMask 5 | 6 | 7 | class TestSegmentationMask(unittest.TestCase): 8 | def __init__(self, method_name='runTest'): 9 | super(TestSegmentationMask, self).__init__(method_name) 10 | poly = [[[423.0, 306.5, 406.5, 277.0, 400.0, 271.5, 389.5, 277.0, 11 | 387.5, 292.0, 384.5, 295.0, 374.5, 220.0, 378.5, 210.0, 12 | 391.0, 200.5, 404.0, 199.5, 414.0, 203.5, 425.5, 221.0, 13 | 438.5, 297.0, 423.0, 306.5], 14 | [100, 100, 200, 100, 200, 200, 100, 200], 15 | ]] 16 | width = 640 17 | height = 480 18 | size = width, height 19 | 20 | self.P = SegmentationMask(poly, size, 'poly') 21 | self.M = SegmentationMask(poly, size, 'poly').convert('mask') 22 | 23 | 24 | def L1(self, A, B): 25 | diff = A.get_mask_tensor() - B.get_mask_tensor() 26 | diff = torch.sum(torch.abs(diff.float())).item() 27 | return diff 28 | 29 | 30 | def test_convert(self): 31 | M_hat = self.M.convert('poly').convert('mask') 32 | P_hat = self.P.convert('mask').convert('poly') 33 | 34 | diff_mask = self.L1(self.M, M_hat) 35 | diff_poly = self.L1(self.P, P_hat) 36 | self.assertTrue(diff_mask == diff_poly) 37 | self.assertTrue(diff_mask <= 8169.) 38 | self.assertTrue(diff_poly <= 8169.) 39 | 40 | 41 | def test_crop(self): 42 | box = [400, 250, 500, 300] # xyxy 43 | diff = self.L1(self.M.crop(box), self.P.crop(box)) 44 | self.assertTrue(diff <= 1.) 45 | 46 | 47 | def test_resize(self): 48 | new_size = 50, 25 49 | M_hat = self.M.resize(new_size) 50 | P_hat = self.P.resize(new_size) 51 | diff = self.L1(M_hat, P_hat) 52 | 53 | self.assertTrue(self.M.size == self.P.size) 54 | self.assertTrue(M_hat.size == P_hat.size) 55 | self.assertTrue(self.M.size != M_hat.size) 56 | self.assertTrue(diff <= 255.) 57 | 58 | 59 | def test_transpose(self): 60 | FLIP_LEFT_RIGHT = 0 61 | FLIP_TOP_BOTTOM = 1 62 | diff_hor = self.L1(self.M.transpose(FLIP_LEFT_RIGHT), 63 | self.P.transpose(FLIP_LEFT_RIGHT)) 64 | 65 | diff_ver = self.L1(self.M.transpose(FLIP_TOP_BOTTOM), 66 | self.P.transpose(FLIP_TOP_BOTTOM)) 67 | 68 | self.assertTrue(diff_hor <= 53250.) 69 | self.assertTrue(diff_ver <= 42494.) 70 | 71 | 72 | if __name__ == "__main__": 73 | 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # Set up custom environment before nearly anything else is imported 4 | # NOTE: this should be the first import (no not reorder) 5 | from fcos_core.utils.env import setup_environment # noqa F401 isort:skip 6 | import env_tests.env as env_tests 7 | 8 | import os 9 | import copy 10 | 11 | from fcos_core.config import cfg as g_cfg 12 | 13 | 14 | def get_config_root_path(): 15 | return env_tests.get_config_root_path() 16 | 17 | 18 | def load_config(rel_path): 19 | ''' Load config from file path specified as path relative to config_root ''' 20 | cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path) 21 | return load_config_from_file(cfg_path) 22 | 23 | 24 | def load_config_from_file(file_path): 25 | ''' Load config from file path specified as absolute path ''' 26 | ret = copy.deepcopy(g_cfg) 27 | ret.merge_from_file(file_path) 28 | return ret 29 | -------------------------------------------------------------------------------- /tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Convert instances from png files to a dictionary 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111 5 | 6 | from __future__ import print_function, absolute_import, division 7 | import os, sys 8 | 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 10 | from csHelpers import * 11 | 12 | # Cityscapes imports 13 | from cityscapesscripts.evaluation.instance import * 14 | from cityscapesscripts.helpers.csHelpers import * 15 | import cv2 16 | from fcos_core.utils import cv2_util 17 | 18 | 19 | def instances2dict_with_polygons(imageFileList, verbose=False): 20 | imgCount = 0 21 | instanceDict = {} 22 | 23 | if not isinstance(imageFileList, list): 24 | imageFileList = [imageFileList] 25 | 26 | if verbose: 27 | print("Processing {} images...".format(len(imageFileList))) 28 | 29 | for imageFileName in imageFileList: 30 | # Load image 31 | img = Image.open(imageFileName) 32 | 33 | # Image as numpy array 34 | imgNp = np.array(img) 35 | 36 | # Initialize label categories 37 | instances = {} 38 | for label in labels: 39 | instances[label.name] = [] 40 | 41 | # Loop through all instance ids in instance image 42 | for instanceId in np.unique(imgNp): 43 | if instanceId < 1000: 44 | continue 45 | instanceObj = Instance(imgNp, instanceId) 46 | instanceObj_dict = instanceObj.toDict() 47 | 48 | #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict()) 49 | if id2label[instanceObj.labelID].hasInstances: 50 | mask = (imgNp == instanceId).astype(np.uint8) 51 | contour, hier = cv2_util.findContours( 52 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 53 | 54 | polygons = [c.reshape(-1).tolist() for c in contour] 55 | instanceObj_dict['contours'] = polygons 56 | 57 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 58 | 59 | imgKey = os.path.abspath(imageFileName) 60 | instanceDict[imgKey] = instances 61 | imgCount += 1 62 | 63 | if verbose: 64 | print("\rImages Processed: {}".format(imgCount), end=' ') 65 | sys.stdout.flush() 66 | 67 | if verbose: 68 | print("") 69 | 70 | return instanceDict 71 | 72 | def main(argv): 73 | fileList = [] 74 | if (len(argv) > 2): 75 | for arg in argv: 76 | if ("png" in arg): 77 | fileList.append(arg) 78 | instances2dict_with_polygons(fileList, True) 79 | 80 | if __name__ == "__main__": 81 | main(sys.argv[1:]) 82 | -------------------------------------------------------------------------------- /tools/remove_solver_states.py: -------------------------------------------------------------------------------- 1 | # Set up custom environment before nearly anything else is imported 2 | # NOTE: this should be the first import (no not reorder) 3 | from fcos_core.utils.env import setup_environment # noqa F401 isort:skip 4 | import argparse 5 | import os 6 | import torch 7 | 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser(description="Remove the solver states stored in a trained model") 11 | parser.add_argument( 12 | "model", 13 | default="models/FCOS_R_50_FPN_1x.pth", 14 | help="path to the input model file", 15 | ) 16 | 17 | args = parser.parse_args() 18 | 19 | model = torch.load(args.model) 20 | del model["optimizer"] 21 | del model["scheduler"] 22 | del model["iteration"] 23 | 24 | filename_wo_ext, ext = os.path.splitext(args.model) 25 | output_file = filename_wo_ext + "_wo_solver_states" + ext 26 | torch.save(model, output_file) 27 | print("Done. The model without solver states is saved to {}".format(output_file)) 28 | 29 | if __name__ == "__main__": 30 | main() 31 | 32 | --------------------------------------------------------------------------------