├── .gitignore ├── .gitmodules ├── README.md ├── config.json ├── configs ├── caffe2 │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml ├── dcn │ ├── README.md │ ├── e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml │ ├── e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml │ ├── e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml │ └── e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_R_101_FPN_1x.yaml ├── e2e_faster_rcnn_R_50_C4_1x.yaml ├── e2e_faster_rcnn_R_50_FPN_1x.yaml ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_faster_rcnn_fbnet.yaml ├── e2e_faster_rcnn_fbnet_600.yaml ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_R_101_FPN_1x.yaml ├── e2e_mask_rcnn_R_50_C4_1x.yaml ├── e2e_mask_rcnn_R_50_FPN_1x.yaml ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── e2e_mask_rcnn_fbnet.yaml ├── e2e_mask_rcnn_fbnet_600.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml ├── glide │ ├── common.yaml │ └── dota.yaml └── gn_baselines │ ├── README.md │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml │ └── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml ├── docker ├── Dockerfile └── docker-jupyter │ ├── Dockerfile │ └── jupyter_notebook_config.py ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── PSROIAlign.h │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── PSROIAlign_cuda.cu │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── dota.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ ├── dota │ │ │ │ ├── __init__.py │ │ │ │ └── dota_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── bbox_aug.py │ ├── inference.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ ├── deform_conv_module.py │ │ ├── deform_pool_func.py │ │ └── deform_pool_module.py │ ├── misc.py │ ├── nms.py │ ├── psroi_align.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fbnet.py │ │ ├── fbnet_builder.py │ │ ├── fbnet_modeldef.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── ratio_head.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ └── retinanet.py │ │ ├── rpn.py │ │ └── utils.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── cython_bbox.c │ ├── cython_bbox.pyx │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── poly_nms │ ├── __init__.py │ ├── poly_nms.py │ ├── polyiou.cpp │ ├── polyiou.h │ ├── polyiou.i │ ├── polyiou.py │ ├── polyiou_wrap.cxx │ └── setup.py │ ├── registry.py │ └── timer.py ├── prepare.py ├── requirements.txt ├── setup.py ├── tools ├── cityscapes │ ├── convert_cityscapes_to_coco.py │ └── instances2dict_with_polygons.py ├── inference.py ├── test_net.py └── train_net.py ├── txt2json.py └── val_result.zip /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | exp* 3 | __pycache__ 4 | _ext 5 | *.pyc 6 | *.so 7 | maskrcnn_benchmark.egg-info/ 8 | build/ 9 | dist/ 10 | 11 | tmp/ 12 | vis/ 13 | # pytorch/python/numpy formats 14 | *.pth 15 | *.pkl 16 | *.npy 17 | 18 | # ipython/jupyter notebooks 19 | *.ipynb 20 | **/.ipynb_checkpoints/ 21 | 22 | # Editor temporaries 23 | *.swn 24 | *.swo 25 | *.swp 26 | *.swpx 27 | *.swx 28 | *~ 29 | 30 | # Pycharm editor settings 31 | .idea 32 | 33 | # vscode editor settings 34 | .vscode 35 | 36 | # MacOS 37 | .DS_Store 38 | 39 | # project dirs 40 | /datasets 41 | /models 42 | /output 43 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "maskrcnn_benchmark/DOTA_devkit"] 2 | path = maskrcnn_benchmark/DOTA_devkit 3 | url = https://github.com/CAPTAIN-WHU/DOTA_devkit.git 4 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "original_root_dir": ".", 3 | "target_root_dir": ".", 4 | "sets": [ 5 | { 6 | "name": "test", 7 | "gap": 512, 8 | "only_img": true 9 | }, 10 | { 11 | "name": "trainval", 12 | "gap": 200, 13 | "only_img": false 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x" 4 | BACKBONE: 5 | CONV_BODY: "R-152-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TEST: ("coco_2014_minival",) 37 | DATALOADER: 38 | SIZE_DIVISIBILITY: 32 39 | -------------------------------------------------------------------------------- /configs/dcn/README.md: -------------------------------------------------------------------------------- 1 | ### Reference 2 | 1 [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf) 3 | 2 third-party: [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) 4 | 5 | ### Performance 6 | | case | bbox AP | mask AP | 7 | |----------------------------:|--------:|:-------:| 8 | | R-50-FPN-dcn (implement) | 39.8 | - | 9 | | R-50-FPN-dcn (mmdetection) | 40.0 | - | 10 | | R-50-FPN-mdcn (implement) | 40.0 | - | 11 | | R-50-FPN-mdcn (mmdetection) | 40.3 | - | 12 | | R-50-FPN-dcn (implement) | 40.8 | 36.8 | 13 | | R-50-FPN-dcn (mmdetection) | 41.1 | 37.2 | 14 | | R-50-FPN-dcn (implement) | 40.7 | 36.7 | 15 | | R-50-FPN-dcn (mmdetection) | 41.4 | 37.4 | 16 | 17 | 18 | ### Note 19 | see [dcn-v2](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2) in `mmdetection` for more details. 20 | 21 | 22 | ### Usage 23 | add these three lines 24 | ``` 25 | MODEL: 26 | RESNETS: 27 | # corresponding to C2,C3,C4,C5 28 | STAGE_WITH_DCN: (False, True, True, True) 29 | WITH_MODULATED_DCN: True 30 | DEFORMABLE_GROUPS: 1 31 | ``` -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_faster_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | DATASETS: 32 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 33 | TEST: ("coco_2014_minival",) 34 | DATALOADER: 35 | SIZE_DIVISIBILITY: 32 36 | SOLVER: 37 | # Assume 8 gpus 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | IMS_PER_BATCH: 16 43 | TEST: 44 | IMS_PER_BATCH: 8 45 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_dconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: False 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | 55 | -------------------------------------------------------------------------------- /configs/dcn/e2e_mask_rcnn_mdconv_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: 12 | BACKBONE_OUT_CHANNELS: 256 13 | STAGE_WITH_DCN: (False, True, True, True) 14 | WITH_MODULATED_DCN: True 15 | DEFORMABLE_GROUPS: 1 16 | RPN: 17 | USE_FPN: True 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | PRE_NMS_TOP_N_TRAIN: 2000 20 | PRE_NMS_TOP_N_TEST: 1000 21 | POST_NMS_TOP_N_TEST: 1000 22 | FPN_POST_NMS_TOP_N_TEST: 1000 23 | ROI_HEADS: 24 | USE_FPN: True 25 | ROI_BOX_HEAD: 26 | POOLER_RESOLUTION: 7 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | POOLER_SAMPLING_RATIO: 2 29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 30 | PREDICTOR: "FPNPredictor" 31 | ROI_MASK_HEAD: 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 34 | PREDICTOR: "MaskRCNNC4Predictor" 35 | POOLER_RESOLUTION: 14 36 | POOLER_SAMPLING_RATIO: 2 37 | RESOLUTION: 28 38 | SHARE_BOX_FEATURE_EXTRACTOR: False 39 | MASK_ON: True 40 | DATASETS: 41 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 42 | TEST: ("coco_2014_minival",) 43 | DATALOADER: 44 | SIZE_DIVISIBILITY: 32 45 | SOLVER: 46 | # Assume 8 gpus 47 | BASE_LR: 0.02 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 90000 51 | IMS_PER_BATCH: 16 52 | TEST: 53 | IMS_PER_BATCH: 8 54 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | DATASETS: 8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 9 | TEST: ("coco_2014_minival",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WEIGHT_DECAY: 0.0001 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | IMS_PER_BATCH: 8 16 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | DATASETS: 24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 25 | TEST: ("coco_2014_minival",) 26 | DATALOADER: 27 | SIZE_DIVISIBILITY: 32 28 | SOLVER: 29 | BASE_LR: 0.02 30 | WEIGHT_DECAY: 0.0001 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RPN: 7 | USE_FPN: True 8 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 9 | PRE_NMS_TOP_N_TRAIN: 2000 10 | PRE_NMS_TOP_N_TEST: 1000 11 | POST_NMS_TOP_N_TEST: 1000 12 | FPN_POST_NMS_TOP_N_TEST: 1000 13 | ROI_HEADS: 14 | USE_FPN: True 15 | ROI_BOX_HEAD: 16 | POOLER_RESOLUTION: 7 17 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 18 | POOLER_SAMPLING_RATIO: 2 19 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 20 | PREDICTOR: "FPNPredictor" 21 | RESNETS: 22 | BACKBONE_OUT_CHANNELS: 256 23 | STRIDE_IN_1X1: False 24 | NUM_GROUPS: 32 25 | WIDTH_PER_GROUP: 8 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | DATALOADER: 30 | SIZE_DIVISIBILITY: 32 31 | SOLVER: 32 | BASE_LR: 0.01 33 | WEIGHT_DECAY: 0.0001 34 | STEPS: (120000, 160000) 35 | MAX_ITER: 180000 36 | IMS_PER_BATCH: 8 37 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 100 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 512 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (320, ) 40 | MAX_SIZE_TRAIN: 640 41 | MIN_SIZE_TEST: 320 42 | MAX_SIZE_TEST: 640 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 256 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.06 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | IMS_PER_BATCH: 128 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "cham_v1a" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | RPN: 12 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 13 | ANCHOR_STRIDE: (16, ) 14 | BATCH_SIZE_PER_IMAGE: 256 15 | PRE_NMS_TOP_N_TRAIN: 6000 16 | PRE_NMS_TOP_N_TEST: 6000 17 | POST_NMS_TOP_N_TRAIN: 2000 18 | POST_NMS_TOP_N_TEST: 200 19 | RPN_HEAD: FBNet.rpn_head 20 | ROI_HEADS: 21 | BATCH_SIZE_PER_IMAGE: 128 22 | ROI_BOX_HEAD: 23 | POOLER_RESOLUTION: 6 24 | FEATURE_EXTRACTOR: FBNet.roi_head 25 | NUM_CLASSES: 81 26 | DATASETS: 27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 28 | TEST: ("coco_2014_minival",) 29 | SOLVER: 30 | BASE_LR: 0.045 31 | WARMUP_FACTOR: 0.1 32 | WEIGHT_DECAY: 0.0001 33 | STEPS: (90000, 120000) 34 | MAX_ITER: 135000 35 | IMS_PER_BATCH: 96 # for 8GPUs 36 | # TEST: 37 | # IMS_PER_BATCH: 8 38 | INPUT: 39 | MIN_SIZE_TRAIN: (600, ) 40 | MAX_SIZE_TRAIN: 1000 41 | MIN_SIZE_TEST: 600 42 | MAX_SIZE_TEST: 1000 43 | PIXEL_MEAN: [103.53, 116.28, 123.675] 44 | PIXEL_STD: [57.375, 57.12, 58.395] 45 | -------------------------------------------------------------------------------- /configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | NUM_CLASSES: 2 24 | ROI_KEYPOINT_HEAD: 25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor" 27 | PREDICTOR: "KeypointRCNNPredictor" 28 | POOLER_RESOLUTION: 14 29 | POOLER_SAMPLING_RATIO: 2 30 | RESOLUTION: 56 31 | SHARE_BOX_FEATURE_EXTRACTOR: False 32 | KEYPOINT_ON: True 33 | DATASETS: 34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",) 35 | TEST: ("keypoints_coco_2014_minival",) 36 | INPUT: 37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.02 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (60000, 80000) 44 | MAX_ITER: 90000 45 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | RPN: 5 | PRE_NMS_TOP_N_TEST: 6000 6 | POST_NMS_TOP_N_TEST: 1000 7 | ROI_MASK_HEAD: 8 | PREDICTOR: "MaskRCNNC4Predictor" 9 | SHARE_BOX_FEATURE_EXTRACTOR: True 10 | MASK_ON: True 11 | DATASETS: 12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 13 | TEST: ("coco_2014_minival",) 14 | SOLVER: 15 | BASE_LR: 0.01 16 | WEIGHT_DECAY: 0.0001 17 | STEPS: (120000, 160000) 18 | MAX_ITER: 180000 19 | IMS_PER_BATCH: 8 20 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | ROI_BOX_HEAD: 18 | POOLER_RESOLUTION: 7 19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 20 | POOLER_SAMPLING_RATIO: 2 21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 22 | PREDICTOR: "FPNPredictor" 23 | ROI_MASK_HEAD: 24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 26 | PREDICTOR: "MaskRCNNC4Predictor" 27 | POOLER_RESOLUTION: 14 28 | POOLER_SAMPLING_RATIO: 2 29 | RESOLUTION: 28 30 | SHARE_BOX_FEATURE_EXTRACTOR: False 31 | MASK_ON: True 32 | DATASETS: 33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 34 | TEST: ("coco_2014_minival",) 35 | DATALOADER: 36 | SIZE_DIVISIBILITY: 32 37 | SOLVER: 38 | BASE_LR: 0.02 39 | WEIGHT_DECAY: 0.0001 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | STRIDE_IN_1X1: False 9 | NUM_GROUPS: 32 10 | WIDTH_PER_GROUP: 8 11 | RPN: 12 | USE_FPN: True 13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 14 | PRE_NMS_TOP_N_TRAIN: 2000 15 | PRE_NMS_TOP_N_TEST: 1000 16 | POST_NMS_TOP_N_TEST: 1000 17 | FPN_POST_NMS_TOP_N_TEST: 1000 18 | ROI_HEADS: 19 | USE_FPN: True 20 | ROI_BOX_HEAD: 21 | POOLER_RESOLUTION: 7 22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 23 | POOLER_SAMPLING_RATIO: 2 24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 25 | PREDICTOR: "FPNPredictor" 26 | ROI_MASK_HEAD: 27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 29 | PREDICTOR: "MaskRCNNC4Predictor" 30 | POOLER_RESOLUTION: 14 31 | POOLER_SAMPLING_RATIO: 2 32 | RESOLUTION: 28 33 | SHARE_BOX_FEATURE_EXTRACTOR: False 34 | MASK_ON: True 35 | DATASETS: 36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 37 | TEST: ("coco_2014_minival",) 38 | DATALOADER: 39 | SIZE_DIVISIBILITY: 32 40 | SOLVER: 41 | BASE_LR: 0.01 42 | WEIGHT_DECAY: 0.0001 43 | STEPS: (120000, 160000) 44 | MAX_ITER: 180000 45 | IMS_PER_BATCH: 8 46 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "default" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: -1.0 12 | RPN: 13 | ANCHOR_SIZES: (16, 32, 64, 128, 256) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 100 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 512 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (320, ) 48 | MAX_SIZE_TRAIN: 640 49 | MIN_SIZE_TEST: 320 50 | MAX_SIZE_TEST: 640 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | CONV_BODY: FBNet 5 | FBNET: 6 | ARCH: "xirb16d_dsmask" 7 | BN_TYPE: "bn" 8 | WIDTH_DIVISOR: 8 9 | DW_CONV_SKIP_BN: True 10 | DW_CONV_SKIP_RELU: True 11 | DET_HEAD_LAST_SCALE: 0.0 12 | RPN: 13 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 14 | ANCHOR_STRIDE: (16, ) 15 | BATCH_SIZE_PER_IMAGE: 256 16 | PRE_NMS_TOP_N_TRAIN: 6000 17 | PRE_NMS_TOP_N_TEST: 6000 18 | POST_NMS_TOP_N_TRAIN: 2000 19 | POST_NMS_TOP_N_TEST: 200 20 | RPN_HEAD: FBNet.rpn_head 21 | ROI_HEADS: 22 | BATCH_SIZE_PER_IMAGE: 256 23 | ROI_BOX_HEAD: 24 | POOLER_RESOLUTION: 6 25 | FEATURE_EXTRACTOR: FBNet.roi_head 26 | NUM_CLASSES: 81 27 | ROI_MASK_HEAD: 28 | POOLER_RESOLUTION: 6 29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask 30 | PREDICTOR: "MaskRCNNConv1x1Predictor" 31 | RESOLUTION: 12 32 | SHARE_BOX_FEATURE_EXTRACTOR: False 33 | MASK_ON: True 34 | DATASETS: 35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 36 | TEST: ("coco_2014_minival",) 37 | SOLVER: 38 | BASE_LR: 0.06 39 | WARMUP_FACTOR: 0.1 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | IMS_PER_BATCH: 128 # for 8GPUs 44 | # TEST: 45 | # IMS_PER_BATCH: 8 46 | INPUT: 47 | MIN_SIZE_TRAIN: (600, ) 48 | MAX_SIZE_TRAIN: 1000 49 | MIN_SIZE_TEST: 600 50 | MAX_SIZE_TEST: 1000 51 | PIXEL_MEAN: [103.53, 116.28, 123.675] 52 | PIXEL_STD: [57.375, 57.12, 58.395] 53 | -------------------------------------------------------------------------------- /configs/glide/common.yaml: -------------------------------------------------------------------------------- 1 | OUTPUT_DIR: "exp_common" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (1024,) 4 | MAX_SIZE_TRAIN: 1333 5 | MIN_SIZE_TEST: 1024 6 | MAX_SIZE_TEST: 1333 7 | MODEL: 8 | META_ARCHITECTURE: "GeneralizedRCNN" 9 | RATIO_ON: True 10 | WEIGHT: "data/R-101.pkl" 11 | BACKBONE: 12 | CONV_BODY: "R-101-FPN" 13 | RESNETS: 14 | BACKBONE_OUT_CHANNELS: 256 15 | RPN: 16 | USE_FPN: True 17 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 18 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 19 | ASPECT_RATIOS: (0.5, 1.0, 2.0) 20 | PRE_NMS_TOP_N_TRAIN: 1200 21 | PRE_NMS_TOP_N_TEST: 1200 22 | FPN_POST_NMS_TOP_N_TRAIN: 1200 23 | FPN_POST_NMS_TOP_N_TEST: 600 24 | ROI_HEADS: 25 | USE_FPN: True 26 | BATCH_SIZE_PER_IMAGE: 512 27 | POSITIVE_FRACTION: 0.25 28 | DETECTIONS_PER_IMG: 2000 29 | ROI_BOX_HEAD: 30 | POOLER_RESOLUTION: 7 31 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 32 | POOLER_SAMPLING_RATIO: 2 33 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 34 | PREDICTOR: "FPNRatioPredictor" 35 | NUM_CLASSES: 16 36 | DATASETS: 37 | TRAIN: ("Your own dataset",) 38 | TEST: ("Your own dataset",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | BASE_LR: 0.0075 43 | WEIGHT_DECAY: 0.0001 44 | STEPS: (38000, 46000) 45 | MAX_ITER: 50000 46 | IMS_PER_BATCH: 6 47 | CHECKPOINT_START_STEP: 30000 48 | TEST: 49 | IMS_PER_BATCH: 12 50 | -------------------------------------------------------------------------------- /configs/glide/dota.yaml: -------------------------------------------------------------------------------- 1 | OUTPUT_DIR: "exp_dota/dota" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (1024,) 4 | MAX_SIZE_TRAIN: 1333 5 | MIN_SIZE_TEST: 1024 6 | MAX_SIZE_TEST: 1333 7 | RANDOM_ROTATE_ON: True # (0, 90, 180, or 270) 8 | BRIGHTNESS: 0. 9 | CONTRAST: 0. 10 | SATURATION: 0. 11 | HUE: 0. 12 | MODEL: 13 | META_ARCHITECTURE: "GeneralizedRCNN" 14 | RATIO_ON: True 15 | WEIGHT: "data/R-101.pkl" 16 | BACKBONE: 17 | CONV_BODY: "R-101-FPN" 18 | RESNETS: 19 | BACKBONE_OUT_CHANNELS: 256 20 | RPN: 21 | USE_FPN: True 22 | ANCHOR_SIZES: (32, 64, 128, 256, 512) 23 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 24 | ASPECT_RATIOS: (0.5, 1.0, 2.0) 25 | PRE_NMS_TOP_N_TRAIN: 1200 26 | PRE_NMS_TOP_N_TEST: 1200 27 | FPN_POST_NMS_TOP_N_TRAIN: 1200 28 | FPN_POST_NMS_TOP_N_TEST: 600 29 | ROI_HEADS: 30 | USE_FPN: True 31 | BATCH_SIZE_PER_IMAGE: 512 32 | POSITIVE_FRACTION: 0.25 33 | DETECTIONS_PER_IMG: 2000 34 | ROI_BOX_HEAD: 35 | POOLER_RESOLUTION: 7 36 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 37 | POOLER_SAMPLING_RATIO: 2 38 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 39 | PREDICTOR: "FPNRatioPredictor" 40 | NUM_CLASSES: 16 41 | DATASETS: 42 | TRAIN: ("dota_trainval_cut",) 43 | TEST: ("dota_test_cut",) 44 | DATALOADER: 45 | SIZE_DIVISIBILITY: 32 46 | SOLVER: 47 | BASE_LR: 0.0075 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (38000, 46000) 50 | MAX_ITER: 50000 51 | IMS_PER_BATCH: 6 52 | CHECKPOINT_START_STEP: 30000 53 | TEST: 54 | IMS_PER_BATCH: 12 55 | -------------------------------------------------------------------------------- /configs/gn_baselines/README.md: -------------------------------------------------------------------------------- 1 | ### Group Normalization 2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494) 3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883) 4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md) 5 | 6 | 7 | ### Performance 8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP | 9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:| 10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 | 11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 | 12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 | 13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 | 14 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | DATASETS: 37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 38 | TEST: ("coco_2014_minival",) 39 | DATALOADER: 40 | SIZE_DIVISIBILITY: 32 41 | SOLVER: 42 | # Assume 8 gpus 43 | BASE_LR: 0.02 44 | WEIGHT_DECAY: 0.0001 45 | STEPS: (60000, 80000) 46 | MAX_ITER: 90000 47 | IMS_PER_BATCH: 16 48 | TEST: 49 | IMS_PER_BATCH: 8 50 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | DATASETS: 39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 40 | TEST: ("coco_2014_minival",) 41 | DATALOADER: 42 | SIZE_DIVISIBILITY: 32 43 | SOLVER: 44 | # Assume 8 gpus 45 | BASE_LR: 0.02 46 | WEIGHT_DECAY: 0.0001 47 | STEPS: (60000, 80000) 48 | MAX_ITER: 90000 49 | IMS_PER_BATCH: 16 50 | TEST: 51 | IMS_PER_BATCH: 8 52 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 35 | PREDICTOR: "FPNPredictor" 36 | ROI_MASK_HEAD: 37 | USE_GN: True # use GN for mask head 38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 39 | CONV_LAYERS: (256, 256, 256, 256) 40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 41 | PREDICTOR: "MaskRCNNC4Predictor" 42 | POOLER_RESOLUTION: 14 43 | POOLER_SAMPLING_RATIO: 2 44 | RESOLUTION: 28 45 | SHARE_BOX_FEATURE_EXTRACTOR: False 46 | MASK_ON: True 47 | DATASETS: 48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 49 | TEST: ("coco_2014_minival",) 50 | DATALOADER: 51 | SIZE_DIVISIBILITY: 32 52 | SOLVER: 53 | # Assume 8 gpus 54 | BASE_LR: 0.02 55 | WEIGHT_DECAY: 0.0001 56 | STEPS: (60000, 80000) 57 | MAX_ITER: 90000 58 | IMS_PER_BATCH: 16 59 | TEST: 60 | IMS_PER_BATCH: 8 61 | -------------------------------------------------------------------------------- /configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN" 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | RESNETS: # use GN for backbone 12 | BACKBONE_OUT_CHANNELS: 256 13 | STRIDE_IN_1X1: False 14 | TRANS_FUNC: "BottleneckWithGN" 15 | STEM_FUNC: "StemWithGN" 16 | FPN: 17 | USE_GN: True # use GN for FPN 18 | RPN: 19 | USE_FPN: True 20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 21 | PRE_NMS_TOP_N_TRAIN: 2000 22 | PRE_NMS_TOP_N_TEST: 1000 23 | POST_NMS_TOP_N_TEST: 1000 24 | FPN_POST_NMS_TOP_N_TEST: 1000 25 | ROI_HEADS: 26 | USE_FPN: True 27 | BATCH_SIZE_PER_IMAGE: 512 28 | POSITIVE_FRACTION: 0.25 29 | ROI_BOX_HEAD: 30 | USE_GN: True # use GN for bbox head 31 | POOLER_RESOLUTION: 7 32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 33 | POOLER_SAMPLING_RATIO: 2 34 | CONV_HEAD_DIM: 256 35 | NUM_STACKED_CONVS: 4 36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor" 37 | PREDICTOR: "FPNPredictor" 38 | ROI_MASK_HEAD: 39 | USE_GN: True # use GN for mask head 40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 41 | CONV_LAYERS: (256, 256, 256, 256) 42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor" 43 | PREDICTOR: "MaskRCNNC4Predictor" 44 | POOLER_RESOLUTION: 14 45 | POOLER_SAMPLING_RATIO: 2 46 | RESOLUTION: 28 47 | SHARE_BOX_FEATURE_EXTRACTOR: False 48 | MASK_ON: True 49 | DATASETS: 50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 51 | TEST: ("coco_2014_minival",) 52 | DATALOADER: 53 | SIZE_DIVISIBILITY: 32 54 | SOLVER: 55 | # Assume 8 gpus 56 | BASE_LR: 0.02 57 | WEIGHT_DECAY: 0.0001 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 90000 60 | IMS_PER_BATCH: 16 61 | TEST: 62 | IMS_PER_BATCH: 8 63 | -------------------------------------------------------------------------------- /configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | INPUT: 2 | MIN_SIZE_TRAIN: (800,) 3 | MAX_SIZE_TRAIN: 1333 4 | MIN_SIZE_TEST: 800 5 | MAX_SIZE_TEST: 1333 6 | MODEL: 7 | META_ARCHITECTURE: "GeneralizedRCNN" 8 | WEIGHT: "" # no pretrained model 9 | BACKBONE: 10 | CONV_BODY: "R-50-FPN" 11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers 12 | RESNETS: # use GN for backbone 13 | BACKBONE_OUT_CHANNELS: 256 14 | STRIDE_IN_1X1: False 15 | TRANS_FUNC: "BottleneckWithGN" 16 | STEM_FUNC: "StemWithGN" 17 | FPN: 18 | USE_GN: True # use GN for FPN 19 | RPN: 20 | USE_FPN: True 21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 22 | PRE_NMS_TOP_N_TRAIN: 2000 23 | PRE_NMS_TOP_N_TEST: 1000 24 | POST_NMS_TOP_N_TEST: 1000 25 | FPN_POST_NMS_TOP_N_TEST: 1000 26 | ROI_HEADS: 27 | USE_FPN: True 28 | BATCH_SIZE_PER_IMAGE: 512 29 | POSITIVE_FRACTION: 0.25 30 | ROI_BOX_HEAD: 31 | USE_GN: True # use GN for bbox head 32 | POOLER_RESOLUTION: 7 33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 34 | POOLER_SAMPLING_RATIO: 2 35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 36 | PREDICTOR: "FPNPredictor" 37 | DATASETS: 38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 39 | TEST: ("coco_2014_minival",) 40 | DATALOADER: 41 | SIZE_DIVISIBILITY: 32 42 | SOLVER: 43 | # Assume 8 gpus 44 | BASE_LR: 0.02 45 | WEIGHT_DECAY: 0.0001 46 | STEPS: (210000, 250000) 47 | MAX_ITER: 270000 48 | IMS_PER_BATCH: 16 49 | TEST: 50 | IMS_PER_BATCH: 8 51 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \ 11 | && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev 12 | 13 | # Install Miniconda 14 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 15 | && chmod +x /miniconda.sh \ 16 | && /miniconda.sh -b -p /miniconda \ 17 | && rm /miniconda.sh 18 | 19 | ENV PATH=/miniconda/bin:$PATH 20 | 21 | # Create a Python 3.6 environment 22 | RUN /miniconda/bin/conda install -y conda-build \ 23 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 24 | && /miniconda/bin/conda clean -ya 25 | 26 | ENV CONDA_DEFAULT_ENV=py36 27 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 28 | ENV PATH=$CONDA_PREFIX/bin:$PATH 29 | ENV CONDA_AUTO_UPDATE_CONDA=false 30 | 31 | RUN conda install -y ipython 32 | RUN pip install requests ninja yacs cython matplotlib opencv-python tqdm 33 | 34 | # NOTE Mingtao 35 | RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 36 | RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 37 | RUN conda config --set show_channel_urls yes 38 | 39 | # Install PyTorch 1.0 Nightly 40 | ARG CUDA 41 | RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \ 42 | && conda clean -ya 43 | 44 | # Install TorchVision master 45 | RUN git clone https://github.com/pytorch/vision.git \ 46 | && cd vision \ 47 | && python setup.py install 48 | 49 | # install pycocotools 50 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 51 | && cd cocoapi/PythonAPI \ 52 | && python setup.py build_ext install 53 | 54 | # install apex 55 | RUN git clone https://github.com/NVIDIA/apex.git \ 56 | && cd apex \ 57 | && python setup.py install --cuda_ext --cpp_ext 58 | 59 | # install PyTorch Detection 60 | ARG FORCE_CUDA="1" 61 | ENV FORCE_CUDA=${FORCE_CUDA} 62 | 63 | # RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 64 | # && cd maskrcnn-benchmark \ 65 | # && python setup.py build develop 66 | 67 | RUN mkdir /dota-ratio 68 | WORKDIR /dota-ratio 69 | -------------------------------------------------------------------------------- /docker/docker-jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="9.0" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04 5 | 6 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections 7 | 8 | # install basics 9 | RUN apt-get update -y \ 10 | && apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ 11 | 12 | # Install Miniconda 13 | RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 14 | && chmod +x /miniconda.sh \ 15 | && /miniconda.sh -b -p /miniconda \ 16 | && rm /miniconda.sh 17 | 18 | ENV PATH=/miniconda/bin:$PATH 19 | 20 | # Create a Python 3.6 environment 21 | RUN /miniconda/bin/conda install -y conda-build \ 22 | && /miniconda/bin/conda create -y --name py36 python=3.6.7 \ 23 | && /miniconda/bin/conda clean -ya 24 | 25 | ENV CONDA_DEFAULT_ENV=py36 26 | ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV 27 | ENV PATH=$CONDA_PREFIX/bin:$PATH 28 | ENV CONDA_AUTO_UPDATE_CONDA=false 29 | 30 | RUN conda install -y ipython 31 | RUN pip install requests ninja yacs cython matplotlib jupyter tqdm 32 | 33 | # Install PyTorch Nightly 34 | ARG CUDA 35 | RUN conda install -y pytorch-nightly cudatoolkit=${CUDA} -c pytorch 36 | 37 | # Install OpenCV 38 | RUN conda install -y opencv -c menpo \ 39 | && conda clean -ya 40 | 41 | WORKDIR /root 42 | 43 | USER root 44 | 45 | RUN mkdir /notebooks 46 | 47 | WORKDIR /notebooks 48 | 49 | # Install TorchVision master 50 | RUN git clone https://github.com/pytorch/vision.git \ 51 | && cd vision \ 52 | && python setup.py install 53 | 54 | # install pycocotools 55 | RUN git clone https://github.com/cocodataset/cocoapi.git \ 56 | && cd cocoapi/PythonAPI \ 57 | && python setup.py build_ext install 58 | 59 | # install apex 60 | RUN git clone https://github.com/NVIDIA/apex.git \ 61 | && cd apex \ 62 | && python setup.py install --cuda_ext --cpp_ext 63 | 64 | # install PyTorch Detection 65 | ARG FORCE_CUDA="1" 66 | ENV FORCE_CUDA=${FORCE_CUDA} 67 | RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ 68 | && cd maskrcnn-benchmark \ 69 | && python setup.py build develop 70 | 71 | RUN jupyter notebook --generate-config 72 | 73 | ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py" 74 | 75 | COPY "jupyter_notebook_config.py" ${CONFIG_PATH} 76 | 77 | ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"] 78 | -------------------------------------------------------------------------------- /docker/docker-jupyter/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from IPython.lib import passwd 3 | 4 | # c = c # pylint:disable=undefined-variable 5 | c = get_config() 6 | c.NotebookApp.ip = '0.0.0.0' 7 | c.NotebookApp.port = int(os.getenv('PORT', 8888)) 8 | c.NotebookApp.open_browser = False 9 | 10 | # sets a password if PASSWORD is set in the environment 11 | if 'PASSWORD' in os.environ: 12 | password = os.environ['PASSWORD'] 13 | if password: 14 | c.NotebookApp.password = passwd(password) 15 | else: 16 | c.NotebookApp.password = '' 17 | c.NotebookApp.token = '' 18 | del os.environ['PASSWORD'] 19 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/PSROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor PSROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio, 17 | const int out_dim) { 18 | if (input.type().is_cuda()) { 19 | #ifdef WITH_CUDA 20 | return PSROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, out_dim); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | AT_ERROR("Not compiled with CPU support"); 26 | //return PSROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 27 | } 28 | 29 | at::Tensor PSROIAlign_backward(const at::Tensor& grad, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int batch_size, 35 | const int channels, 36 | const int height, 37 | const int width, 38 | const int sampling_ratio, 39 | const int out_dim) { 40 | if (grad.type().is_cuda()) { 41 | #ifdef WITH_CUDA 42 | return PSROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio, out_dim); 43 | #else 44 | AT_ERROR("Not compiled with GPU support"); 45 | #endif 46 | } 47 | AT_ERROR("Not implemented on the CPU"); 48 | } 49 | 50 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | 18 | at::Tensor coveredby_cpu(const at::Tensor& dets, 19 | const float alpha, 20 | const float beta); 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "PSROIAlign.h" 5 | #include "ROIPool.h" 6 | #include "SigmoidFocalLoss.h" 7 | #include "deform_conv.h" 8 | #include "deform_pool.h" 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("nms", &nms, "non-maximum suppression"); 12 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 13 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 14 | m.def("psroi_align_forward", &PSROIAlign_forward, "PSROIAlign_forward"); 15 | m.def("psroi_align_backward", &PSROIAlign_backward, "PSROIAlign_backward"); 16 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 17 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 18 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 19 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 20 | // dcn-v2 21 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 22 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 23 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 24 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 25 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 26 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 27 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 28 | } 29 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/README.md: -------------------------------------------------------------------------------- 1 | # Setting Up Datasets 2 | This file describes how to perform training on other datasets. 3 | 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently. 5 | 6 | We expect the annotations from other datasets be converted to COCO json format, and 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm) 8 | 9 | ## Creating Symlinks for PASCAL VOC 10 | 11 | We assume that your symlinked `datasets/voc/VOC` directory has the following structure: 12 | 13 | ``` 14 | VOC 15 | |_ JPEGImages 16 | | |_ .jpg 17 | | |_ ... 18 | | |_ .jpg 19 | |_ Annotations 20 | | |_ pascal_train.json (optional) 21 | | |_ pascal_val.json (optional) 22 | | |_ pascal_test.json (optional) 23 | | |_ .xml 24 | | |_ ... 25 | | |_ .xml 26 | |_ VOCdevkit 27 | ``` 28 | 29 | Create symlinks for `voc/VOC`: 30 | 31 | ``` 32 | cd ~/github/maskrcnn-benchmark 33 | mkdir -p datasets/voc/VOC 34 | ln -s /path/to/VOC /datasets/voc/VOC 35 | ``` 36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/). 37 | 38 | ### PASCAL VOC Annotations in COCO Format 39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) 40 | via http://cocodataset.org/#external. 41 | 42 | ## Creating Symlinks for Cityscapes: 43 | 44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure: 45 | 46 | ``` 47 | cityscapes 48 | |_ images 49 | | |_ .jpg 50 | | |_ ... 51 | | |_ .jpg 52 | |_ annotations 53 | | |_ instanceonly_gtFile_train.json 54 | | |_ ... 55 | |_ raw 56 | |_ gtFine 57 | |_ ... 58 | |_ README.md 59 | ``` 60 | 61 | Create symlinks for `cityscapes`: 62 | 63 | ``` 64 | cd ~/github/maskrcnn-benchmark 65 | mkdir -p datasets/cityscapes 66 | ln -s /path/to/cityscapes datasets/data/cityscapes 67 | ``` 68 | 69 | ### Steps to convert Cityscapes Annotations to COCO Format 70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required) 71 | 2. Extract it to /path/to/gtFine_trainvaltest 72 | ``` 73 | cityscapes 74 | |_ gtFine_trainvaltest.zip 75 | |_ gtFine_trainvaltest 76 | |_ gtFine 77 | ``` 78 | 3. Run the below commands to convert the annotations 79 | 80 | ``` 81 | cd ~/github 82 | git clone https://github.com/mcordts/cityscapesScripts.git 83 | cd cityscapesScripts 84 | cp ~/github/maskrcnn-benchmark/tools/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation 85 | python setup.py install 86 | cd ~/github/maskrcnn-benchmark 87 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/cityscapes --outdir /path/to/cityscapes/annotations 88 | ``` 89 | 90 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/). 91 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | 22 | 23 | class BBoxAugCollator(object): 24 | """ 25 | From a list of samples from the dataset, 26 | returns the images and targets. 27 | Images should be converted to batched images in `im_detect_bbox_aug` 28 | """ 29 | 30 | def __call__(self, batch): 31 | return list(zip(*batch)) 32 | 33 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | from .dota import DOTADataset 6 | 7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "DOTADataset"] 8 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 7 | from maskrcnn_benchmark.structures.keypoint import PersonKeypoints 8 | 9 | 10 | min_keypoints_per_image = 10 11 | 12 | 13 | def _count_visible_keypoints(anno): 14 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 15 | 16 | 17 | def _has_only_empty_bbox(anno): 18 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 19 | 20 | 21 | def has_valid_annotation(anno): 22 | # if it's empty, there is no annotation 23 | if len(anno) == 0: 24 | return False 25 | # if all boxes have close to zero area, there is no annotation 26 | if _has_only_empty_bbox(anno): 27 | return False 28 | # keypoints task have a slight different critera for considering 29 | # if an annotation is valid 30 | if "keypoints" not in anno[0]: 31 | return True 32 | # for keypoint detection tasks, only consider valid images those 33 | # containing at least min_keypoints_per_image 34 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 35 | return True 36 | return False 37 | 38 | 39 | class COCODataset(torchvision.datasets.coco.CocoDetection): 40 | def __init__( 41 | self, ann_file, root, remove_images_without_annotations, transforms=None 42 | ): 43 | super(COCODataset, self).__init__(root, ann_file) 44 | # sort indices for reproducible results 45 | self.ids = sorted(self.ids) 46 | 47 | # filter images without detection annotations 48 | if remove_images_without_annotations: 49 | ids = [] 50 | for img_id in self.ids: 51 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 52 | anno = self.coco.loadAnns(ann_ids) 53 | if has_valid_annotation(anno): 54 | ids.append(img_id) 55 | self.ids = ids 56 | 57 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 58 | 59 | self.json_category_id_to_contiguous_id = { 60 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 61 | } 62 | self.contiguous_category_id_to_json_id = { 63 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 64 | } 65 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 66 | self._transforms = transforms 67 | 68 | def __getitem__(self, idx): 69 | img, anno = super(COCODataset, self).__getitem__(idx) 70 | 71 | # filter crowd annotations 72 | # TODO might be better to add an extra field 73 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 74 | 75 | boxes = [obj["bbox"] for obj in anno] 76 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 77 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 78 | 79 | classes = [obj["category_id"] for obj in anno] 80 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 81 | classes = torch.tensor(classes) 82 | target.add_field("labels", classes) 83 | 84 | if anno and "segmentation" in anno[0]: 85 | masks = [obj["segmentation"] for obj in anno] 86 | masks = SegmentationMask(masks, img.size, mode='poly') 87 | target.add_field("masks", masks) 88 | 89 | if anno and "keypoints" in anno[0]: 90 | keypoints = [obj["keypoints"] for obj in anno] 91 | keypoints = PersonKeypoints(keypoints, img.size) 92 | target.add_field("keypoints", keypoints) 93 | 94 | target = target.clip_to_image(remove_empty=True) 95 | 96 | if self._transforms is not None: 97 | img, target = self._transforms(img, target) 98 | 99 | return img, target, idx 100 | 101 | def get_img_info(self, index): 102 | img_id = self.id_to_img_map[index] 103 | img_data = self.coco.imgs[img_id] 104 | return img_data 105 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | from .dota import dota_evaluation 6 | 7 | 8 | def evaluate(dataset, predictions, output_folder, **kwargs): 9 | """evaluate dataset using different methods based on dataset type. 10 | Args: 11 | dataset: Dataset object 12 | predictions(list[BoxList]): each item in the list represents the 13 | prediction results for one image. 14 | output_folder: output folder, to save evaluation files or results. 15 | **kwargs: other args. 16 | Returns: 17 | evaluation result 18 | """ 19 | args = dict( 20 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 21 | ) 22 | if isinstance(dataset, datasets.COCODataset): 23 | return coco_evaluation(**args) 24 | elif isinstance(dataset, datasets.PascalVOCDataset): 25 | return voc_evaluation(**args) 26 | elif isinstance(dataset, datasets.DOTADataset): 27 | return dota_evaluation(**args) 28 | else: 29 | dataset_name = dataset.__class__.__name__ 30 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 31 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/dota/__init__.py: -------------------------------------------------------------------------------- 1 | from .dota_eval import do_dota_evaluation 2 | 3 | 4 | def dota_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only, 9 | iou_types, 10 | expected_results, 11 | expected_results_sigma_tol, 12 | ): 13 | return do_dota_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/dota/dota_eval.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import tempfile 3 | import os 4 | import torch 5 | from collections import OrderedDict 6 | from tqdm import tqdm 7 | import cv2 8 | import numpy as np 9 | 10 | from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker 11 | from maskrcnn_benchmark.structures.bounding_box import BoxList 12 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 13 | from maskrcnn_benchmark.utils.poly_nms.poly_nms import poly_nms 14 | from maskrcnn_benchmark.config import cfg 15 | 16 | def write( output_folder, pred_dict ): 17 | output_folder_txt = os.path.join( output_folder, "results" ) 18 | if not os.path.exists( output_folder_txt ): 19 | os.mkdir( output_folder_txt ) 20 | for key in pred_dict: 21 | detections = pred_dict[key] 22 | output_path = os.path.join( output_folder, "Task1_" + key + ".txt") 23 | with open(output_path, "w") as f: 24 | for det in detections: 25 | row = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( 26 | det[0], det[1], 27 | det[2], det[3], 28 | det[4], det[5], 29 | det[6], det[7], 30 | det[8], det[9] 31 | ) 32 | f.write(row) 33 | 34 | def handle_ratio_prediction(prediction): 35 | hboxes = prediction.bbox.data.numpy() 36 | rboxes = prediction.get_field( "rboxes" ).data.numpy() 37 | ratios = prediction.get_field( "ratios" ).data.numpy() 38 | scores = prediction.get_field( "scores" ).data.numpy() 39 | labels = prediction.get_field( "labels" ).data.numpy() 40 | 41 | 42 | h_idx = np.where(ratios > 0.8)[0] 43 | h = hboxes[h_idx] 44 | hboxes_vtx = np.vstack( [h[:, 0], h[:, 1], h[:, 2], h[:, 1], h[:, 2], h[:, 3], h[:, 0], h[:, 3]] ).transpose((1,0)) 45 | rboxes[h_idx] = hboxes_vtx 46 | keep = poly_nms( np.hstack( [rboxes, scores[:, np.newaxis]] ).astype( np.double ), 0.1 ) 47 | 48 | rboxes = rboxes[keep].astype( np.int32 ) 49 | scores = scores[keep] 50 | labels = labels[keep] 51 | 52 | if len( rboxes ) > 0: 53 | rboxes = np.vstack( rboxes ) 54 | return rboxes, scores, labels 55 | else: 56 | return None, None, None 57 | 58 | def do_dota_evaluation( 59 | dataset, 60 | predictions, 61 | box_only, 62 | output_folder, 63 | iou_types, 64 | expected_results, 65 | expected_results_sigma_tol, 66 | ): 67 | pred_dict = {label:[] for label in dataset.categories.values()} 68 | for image_id, prediction in tqdm( enumerate(predictions) ): 69 | original_id = dataset.id_to_img_map[image_id] 70 | img_info = dataset.get_img_info(image_id) 71 | image_width = img_info["width"] 72 | image_height = img_info["height"] 73 | prediction = prediction.resize((image_width, image_height)) 74 | 75 | if cfg.MODEL.RATIO_ON: 76 | rboxes, scores, labels = handle_ratio_prediction(prediction) 77 | else: 78 | raise NotImplementedError 79 | if rboxes is None: 80 | continue 81 | 82 | # img_name = img_info["file_name"].split( "/" )[-1].split( "." )[0] 83 | img_name = os.path.basename( img_info["file_name"] )[:-4] 84 | 85 | for rbox, score, label in zip(rboxes, scores, labels): 86 | json_label = dataset.contiguous_category_id_to_json_id[label] 87 | json_label = dataset.categories[json_label] 88 | object_row = rbox.tolist() 89 | object_row.insert(0, score) 90 | object_row.insert(0, img_name) 91 | pred_dict[json_label].append(object_row) 92 | 93 | write( output_folder, pred_dict ) 94 | 95 | 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | crop = cfg.INPUT.CROP_TRAIN 10 | rect = cfg.INPUT.RECT_TRAIN 11 | square_rotate = cfg.INPUT.SQUARE_ROTATE_TRAIN 12 | sort_vertices = cfg.INPUT.SORT_VERTICES 13 | rotate90_prob = cfg.INPUT.ROTATE90_PROB_TRAIN 14 | random_rotate_on = cfg.INPUT.RANDOM_ROTATE_ON 15 | flip_horizontal_prob = cfg.INPUT.FLIP_PROB_TRAIN 16 | flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN 17 | brightness = cfg.INPUT.BRIGHTNESS 18 | contrast = cfg.INPUT.CONTRAST 19 | saturation = cfg.INPUT.SATURATION 20 | hue = cfg.INPUT.HUE 21 | else: 22 | min_size = cfg.INPUT.MIN_SIZE_TEST 23 | max_size = cfg.INPUT.MAX_SIZE_TEST 24 | crop = False 25 | rect = False 26 | square_rotate = False 27 | sort_vertices = False 28 | sort_quadio_vertices = False 29 | random_rotate_on = False 30 | rotate90_prob = 0.0 31 | flip_horizontal_prob = 0.0 32 | flip_vertical_prob = 0.0 33 | brightness = 0.0 34 | contrast = 0.0 35 | saturation = 0.0 36 | hue = 0.0 37 | 38 | to_bgr255 = cfg.INPUT.TO_BGR255 39 | normalize_transform = T.Normalize( 40 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 41 | ) 42 | color_jitter = T.ColorJitter( 43 | brightness=brightness, 44 | contrast=contrast, 45 | saturation=saturation, 46 | hue=hue, 47 | ) 48 | 49 | transform = T.Compose( 50 | [ 51 | color_jitter, 52 | # NOTE Mingtao 53 | # T.RandomSampleCrop( crop ), 54 | T.ToRect( rect ), 55 | T.RandomSquareRotate( square_rotate ), 56 | T.RandomRotate90( rotate90_prob ), 57 | T.RandomRotateAug(random_rotate_on), 58 | T.Resize(min_size, max_size), 59 | T.RandomHorizontalFlip(flip_horizontal_prob), 60 | T.RandomVerticalFlip(flip_vertical_prob), 61 | T.ToTensor(), 62 | normalize_transform, 63 | ], 64 | is_train 65 | ) 66 | return transform 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import DFConv2d 7 | from .misc import ConvTranspose2d 8 | from .misc import BatchNorm2d 9 | from .misc import interpolate 10 | from .nms import nms 11 | from .psroi_align import PSROIAlign 12 | from .psroi_align import psroi_align 13 | from .roi_align import ROIAlign 14 | from .roi_align import roi_align 15 | from .roi_pool import ROIPool 16 | from .roi_pool import roi_pool 17 | from .smooth_l1_loss import smooth_l1_loss 18 | from .sigmoid_focal_loss import SigmoidFocalLoss 19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 21 | from .dcn.deform_pool_func import deform_roi_pooling 22 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 23 | 24 | 25 | __all__ = [ 26 | "nms", 27 | "roi_align", 28 | "ROIAlign", 29 | "roi_pool", 30 | "ROIPool", 31 | "smooth_l1_loss", 32 | "Conv2d", 33 | "DFConv2d", 34 | "ConvTranspose2d", 35 | "interpolate", 36 | "BatchNorm2d", 37 | "FrozenBatchNorm2d", 38 | "SigmoidFocalLoss", 39 | 'deform_conv', 40 | 'modulated_deform_conv', 41 | 'DeformConv', 42 | 'ModulatedDeformConv', 43 | 'ModulatedDeformConvPack', 44 | 'deform_roi_pooling', 45 | 'DeformRoIPooling', 46 | 'DeformRoIPoolingPack', 47 | 'ModulatedDeformRoIPoolingPack', 48 | ] 49 | 50 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | # Cast all fixed parameters to half() if necessary 21 | if x.dtype == torch.float16: 22 | self.weight = self.weight.half() 23 | self.bias = self.bias.half() 24 | self.running_mean = self.running_mean.half() 25 | self.running_var = self.running_var.half() 26 | 27 | scale = self.weight * self.running_var.rsqrt() 28 | bias = self.bias - self.running_mean * scale 29 | scale = scale.reshape(1, -1, 1, 1) 30 | bias = bias.reshape(1, -1, 1, 1) 31 | return x * scale + bias 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # 4 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from maskrcnn_benchmark import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from maskrcnn_benchmark import _C 4 | 5 | from apex import amp 6 | 7 | # Only valid with fp32 inputs - give AMP the hint 8 | nms = amp.float_function(_C.nms) 9 | 10 | # nms.__doc__ = """ 11 | # This function performs Non-maximum suppresion""" 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/psroi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | from apex import amp 11 | 12 | class _PSROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, out_dim): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | ctx.out_dim = out_dim 21 | output = _C.psroi_align_forward( 22 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, out_dim 23 | ) 24 | return output 25 | 26 | @staticmethod 27 | @once_differentiable 28 | def backward(ctx, grad_output): 29 | rois, = ctx.saved_tensors 30 | output_size = ctx.output_size 31 | spatial_scale = ctx.spatial_scale 32 | sampling_ratio = ctx.sampling_ratio 33 | bs, ch, h, w = ctx.input_shape 34 | grad_input = _C.psroi_align_backward( 35 | grad_output, 36 | rois, 37 | spatial_scale, 38 | output_size[0], 39 | output_size[1], 40 | bs, 41 | ch, 42 | h, 43 | w, 44 | sampling_ratio, 45 | ctx.out_dim 46 | ) 47 | return grad_input, None, None, None, None, None 48 | 49 | 50 | psroi_align = _PSROIAlign.apply 51 | 52 | class PSROIAlign(nn.Module): 53 | def __init__(self, output_size, spatial_scale, sampling_ratio, out_dim): 54 | super(PSROIAlign, self).__init__() 55 | self.output_size = output_size 56 | self.spatial_scale = spatial_scale 57 | self.sampling_ratio = sampling_ratio 58 | self.out_dim = out_dim 59 | 60 | @amp.float_function 61 | def forward(self, input, rois): 62 | return psroi_align( 63 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.out_dim 64 | ) 65 | 66 | def __repr__(self): 67 | tmpstr = self.__class__.__name__ + "(" 68 | tmpstr += "output_size=" + str(self.output_size) 69 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 70 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 71 | tmpstr += ")" 72 | return tmpstr 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | from apex import amp 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | output = _C.roi_align_forward( 21 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 22 | ) 23 | return output 24 | 25 | @staticmethod 26 | @once_differentiable 27 | def backward(ctx, grad_output): 28 | rois, = ctx.saved_tensors 29 | output_size = ctx.output_size 30 | spatial_scale = ctx.spatial_scale 31 | sampling_ratio = ctx.sampling_ratio 32 | bs, ch, h, w = ctx.input_shape 33 | grad_input = _C.roi_align_backward( 34 | grad_output, 35 | rois, 36 | spatial_scale, 37 | output_size[0], 38 | output_size[1], 39 | bs, 40 | ch, 41 | h, 42 | w, 43 | sampling_ratio, 44 | ) 45 | return grad_input, None, None, None, None 46 | 47 | 48 | roi_align = _ROIAlign.apply 49 | 50 | class ROIAlign(nn.Module): 51 | def __init__(self, output_size, spatial_scale, sampling_ratio): 52 | super(ROIAlign, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | self.sampling_ratio = sampling_ratio 56 | 57 | @amp.float_function 58 | def forward(self, input, rois): 59 | return roi_align( 60 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 61 | ) 62 | 63 | def __repr__(self): 64 | tmpstr = self.__class__.__name__ + "(" 65 | tmpstr += "output_size=" + str(self.output_size) 66 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 67 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 68 | tmpstr += ")" 69 | return tmpstr 70 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | from apex import amp 11 | 12 | class _ROIPool(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale): 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.input_shape = input.size() 18 | output, argmax = _C.roi_pool_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1] 20 | ) 21 | ctx.save_for_backward(input, roi, argmax) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | input, rois, argmax = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_pool_backward( 32 | grad_output, 33 | input, 34 | rois, 35 | argmax, 36 | spatial_scale, 37 | output_size[0], 38 | output_size[1], 39 | bs, 40 | ch, 41 | h, 42 | w, 43 | ) 44 | return grad_input, None, None, None 45 | 46 | 47 | roi_pool = _ROIPool.apply 48 | 49 | 50 | class ROIPool(nn.Module): 51 | def __init__(self, output_size, spatial_scale): 52 | super(ROIPool, self).__init__() 53 | self.output_size = output_size 54 | self.spatial_scale = spatial_scale 55 | 56 | @amp.float_function 57 | def forward(self, input, rois): 58 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 59 | 60 | def __repr__(self): 61 | tmpstr = self.__class__.__name__ + "(" 62 | tmpstr += "output_size=" + str(self.output_size) 63 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 64 | tmpstr += ")" 65 | return tmpstr 66 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from maskrcnn_benchmark import _C 7 | 8 | # TODO: Use JIT to replace CUDA implementation in the future. 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, gamma, alpha): 12 | ctx.save_for_backward(logits, targets) 13 | num_classes = logits.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | losses = _C.sigmoid_focalloss_forward( 19 | logits, targets, num_classes, gamma, alpha 20 | ) 21 | return losses 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, d_loss): 26 | logits, targets = ctx.saved_tensors 27 | num_classes = ctx.num_classes 28 | gamma = ctx.gamma 29 | alpha = ctx.alpha 30 | d_loss = d_loss.contiguous() 31 | d_logits = _C.sigmoid_focalloss_backward( 32 | logits, targets, d_loss, num_classes, gamma, alpha 33 | ) 34 | return d_logits, None, None, None, None 35 | 36 | 37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 38 | 39 | 40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 41 | num_classes = logits.shape[1] 42 | gamma = gamma[0] 43 | alpha = alpha[0] 44 | dtype = targets.dtype 45 | device = targets.device 46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 47 | 48 | t = targets.unsqueeze(1) 49 | p = torch.sigmoid(logits) 50 | term1 = (1 - p) ** gamma * torch.log(p) 51 | term2 = p ** gamma * torch.log(1 - p) 52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | return loss.sum() 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "gamma=" + str(self.gamma) 74 | tmpstr += ", alpha=" + str(self.alpha) 75 | tmpstr += ")" 76 | return tmpstr 77 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | from . import fbnet 4 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform 8 | from . import fpn as fpn_module 9 | from . import resnet 10 | 11 | 12 | @registry.BACKBONES.register("R-50-C4") 13 | @registry.BACKBONES.register("R-50-C5") 14 | @registry.BACKBONES.register("R-101-C4") 15 | @registry.BACKBONES.register("R-101-C5") 16 | @registry.BACKBONES.register("R-101-C4-C5") 17 | @registry.BACKBONES.register("R-50-C4-C5") 18 | def build_resnet_backbone(cfg): 19 | body = resnet.ResNet(cfg) 20 | model = nn.Sequential(OrderedDict([("body", body)])) 21 | model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 22 | return model 23 | 24 | 25 | @registry.BACKBONES.register("R-50-FPN") 26 | @registry.BACKBONES.register("R-101-FPN") 27 | @registry.BACKBONES.register("R-152-FPN") 28 | def build_resnet_fpn_backbone(cfg): 29 | body = resnet.ResNet(cfg) 30 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 31 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 32 | fpn = fpn_module.FPN( 33 | in_channels_list=[ 34 | in_channels_stage2, 35 | in_channels_stage2 * 2, 36 | in_channels_stage2 * 4, 37 | in_channels_stage2 * 8, 38 | ], 39 | out_channels=out_channels, 40 | conv_block=conv_with_kaiming_uniform( 41 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 42 | ), 43 | top_blocks=fpn_module.LastLevelMaxPool(), 44 | ) 45 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 46 | model.out_channels = out_channels 47 | return model 48 | 49 | 50 | @registry.BACKBONES.register("R-50-FPN-RETINANET") 51 | @registry.BACKBONES.register("R-101-FPN-RETINANET") 52 | def build_resnet_fpn_p3p7_backbone(cfg): 53 | body = resnet.ResNet(cfg) 54 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 55 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 56 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 57 | else out_channels 58 | fpn = fpn_module.FPN( 59 | in_channels_list=[ 60 | 0, 61 | in_channels_stage2 * 2, 62 | in_channels_stage2 * 4, 63 | in_channels_stage2 * 8, 64 | ], 65 | out_channels=out_channels, 66 | conv_block=conv_with_kaiming_uniform( 67 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 68 | ), 69 | top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels), 70 | ) 71 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 72 | model.out_channels = out_channels 73 | return model 74 | 75 | 76 | def build_backbone(cfg): 77 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 78 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 79 | cfg.MODEL.BACKBONE.CONV_BODY 80 | ) 81 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 82 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | 7 | class FPN(nn.Module): 8 | """ 9 | Module that adds FPN on top of a list of feature maps. 10 | The feature maps are currently supposed to be in increasing depth 11 | order, and must be consecutive 12 | """ 13 | 14 | def __init__( 15 | self, in_channels_list, out_channels, conv_block, top_blocks=None 16 | ): 17 | """ 18 | Arguments: 19 | in_channels_list (list[int]): number of channels for each feature map that 20 | will be fed 21 | out_channels (int): number of channels of the FPN representation 22 | top_blocks (nn.Module or None): if provided, an extra operation will 23 | be performed on the output of the last (smallest resolution) 24 | FPN output, and the result will extend the result list 25 | """ 26 | super(FPN, self).__init__() 27 | self.inner_blocks = [] 28 | self.layer_blocks = [] 29 | for idx, in_channels in enumerate(in_channels_list, 1): 30 | inner_block = "fpn_inner{}".format(idx) 31 | layer_block = "fpn_layer{}".format(idx) 32 | 33 | if in_channels == 0: 34 | continue 35 | inner_block_module = conv_block(in_channels, out_channels, 1) 36 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 37 | self.add_module(inner_block, inner_block_module) 38 | self.add_module(layer_block, layer_block_module) 39 | self.inner_blocks.append(inner_block) 40 | self.layer_blocks.append(layer_block) 41 | self.top_blocks = top_blocks 42 | 43 | def forward(self, x): 44 | """ 45 | Arguments: 46 | x (list[Tensor]): feature maps for each feature level. 47 | Returns: 48 | results (tuple[Tensor]): feature maps after FPN layers. 49 | They are ordered from highest resolution first. 50 | """ 51 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 52 | results = [] 53 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 54 | for feature, inner_block, layer_block in zip( 55 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 56 | ): 57 | if not inner_block: 58 | continue 59 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 60 | inner_lateral = getattr(self, inner_block)(feature) 61 | # TODO use size instead of scale to make it robust to different sizes 62 | # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], 63 | # mode='bilinear', align_corners=False) 64 | last_inner = inner_lateral + inner_top_down 65 | results.insert(0, getattr(self, layer_block)(last_inner)) 66 | 67 | if isinstance(self.top_blocks, LastLevelP6P7): 68 | last_results = self.top_blocks(x[-1], results[-1]) 69 | results.extend(last_results) 70 | elif isinstance(self.top_blocks, LastLevelMaxPool): 71 | last_results = self.top_blocks(results[-1]) 72 | results.extend(last_results) 73 | 74 | return tuple(results) 75 | 76 | 77 | class LastLevelMaxPool(nn.Module): 78 | def forward(self, x): 79 | return [F.max_pool2d(x, 1, 2, 0)] 80 | 81 | 82 | class LastLevelP6P7(nn.Module): 83 | """ 84 | This module is used in RetinaNet to generate extra layers, P6 and P7. 85 | """ 86 | def __init__(self, in_channels, out_channels): 87 | super(LastLevelP6P7, self).__init__() 88 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 89 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 90 | for module in [self.p6, self.p7]: 91 | nn.init.kaiming_uniform_(module.weight, a=1) 92 | nn.init.constant_(module.bias, 0) 93 | self.use_P5 = in_channels == out_channels 94 | 95 | def forward(self, c5, p5): 96 | x = p5 if self.use_P5 else c5 97 | p6 = self.p6(x) 98 | p7 = self.p7(F.relu(p6)) 99 | return [p6, p7] 100 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # NOTE Mingtao 6 | class OHEMSampler( object ): 7 | def __init__(self, batch_size_per_image, positive_fraction): 8 | """ 9 | Arguments: 10 | batch_size_per_image (int): number of elements to be selected per image 11 | positive_fraction (float): percentage of positive elements per batch 12 | """ 13 | self.batch_size_per_image = batch_size_per_image 14 | self.positive_fraction = positive_fraction 15 | 16 | def __call__( self, matched_idxs ): 17 | pos_idx = [] 18 | neg_idx = [] 19 | 20 | 21 | 22 | class BalancedPositiveNegativeSampler(object): 23 | """ 24 | This class samples batches, ensuring that they contain a fixed proportion of positives 25 | """ 26 | 27 | def __init__(self, batch_size_per_image, positive_fraction): 28 | """ 29 | Arguments: 30 | batch_size_per_image (int): number of elements to be selected per image 31 | positive_fraction (float): percentage of positive elements per batch 32 | """ 33 | self.batch_size_per_image = batch_size_per_image 34 | self.positive_fraction = positive_fraction 35 | 36 | def __call__(self, matched_idxs): 37 | """ 38 | Arguments: 39 | matched idxs: list of tensors containing -1, 0 or positive values. 40 | Each tensor corresponds to a specific image. 41 | -1 values are ignored, 0 are considered as negatives and > 0 as 42 | positives. 43 | 44 | Returns: 45 | pos_idx (list[tensor]) 46 | neg_idx (list[tensor]) 47 | 48 | Returns two lists of binary masks for each image. 49 | The first list contains the positive elements that were selected, 50 | and the second list the negative example. 51 | """ 52 | pos_idx = [] 53 | neg_idx = [] 54 | for matched_idxs_per_image in matched_idxs: 55 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 56 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 57 | 58 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 59 | # protect against not enough positive examples 60 | num_pos = min(positive.numel(), num_pos) 61 | num_neg = self.batch_size_per_image - num_pos 62 | # protect against not enough negative examples 63 | num_neg = min(negative.numel(), num_neg) 64 | # print(num_pos, num_neg) 65 | 66 | # randomly select positive and negative examples 67 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 68 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 69 | 70 | pos_idx_per_image = positive[perm1] 71 | neg_idx_per_image = negative[perm2] 72 | 73 | # create binary mask from indices 74 | pos_idx_per_image_mask = torch.zeros_like( 75 | matched_idxs_per_image, dtype=torch.uint8 76 | ) 77 | neg_idx_per_image_mask = torch.zeros_like( 78 | matched_idxs_per_image, dtype=torch.uint8 79 | ) 80 | pos_idx_per_image_mask[pos_idx_per_image] = 1 81 | neg_idx_per_image_mask[neg_idx_per_image] = 1 82 | 83 | pos_idx.append(pos_idx_per_image_mask) 84 | neg_idx.append(neg_idx_per_image_mask) 85 | 86 | return pos_idx, neg_idx 87 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRCNN, self).__init__() 28 | 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg, self.backbone.out_channels) 31 | roi_head_channels = self.backbone.out_channels * (2 if "C4-C5" in cfg.MODEL.BACKBONE.CONV_BODY else 1) 32 | self.roi_heads = build_roi_heads(cfg, roi_head_channels) 33 | 34 | def forward(self, images, targets=None): 35 | """ 36 | Arguments: 37 | images (list[Tensor] or ImageList): images to be processed 38 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 39 | 40 | Returns: 41 | result (list[BoxList] or dict[Tensor]): the output from the model. 42 | During training, it returns a dict[Tensor] which contains the losses. 43 | During testing, it returns list[BoxList] contains additional fields 44 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 45 | 46 | """ 47 | if self.training and targets is None: 48 | raise ValueError("In training mode, targets should be passed") 49 | images = to_image_list(images) 50 | features = self.backbone(images.tensors) 51 | proposals, proposal_losses = self.rpn(images, features, targets) 52 | if self.roi_heads: 53 | x, result, detector_losses = self.roi_heads(features, proposals, targets) 54 | else: 55 | # RPN-only models don't have roi_heads 56 | x = features 57 | result = proposals 58 | detector_losses = {} 59 | 60 | if self.training: 61 | losses = {} 62 | losses.update(detector_losses) 63 | losses.update(proposal_losses) 64 | return losses 65 | 66 | return result 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | from torch.nn import functional as F 9 | from maskrcnn_benchmark.config import cfg 10 | from maskrcnn_benchmark.layers import Conv2d 11 | from maskrcnn_benchmark.modeling.poolers import Pooler 12 | 13 | 14 | def get_group_gn(dim, dim_per_gp, num_groups): 15 | """get number of groups used by GroupNorm, based on number of channels.""" 16 | assert dim_per_gp == -1 or num_groups == -1, \ 17 | "GroupNorm: can only specify G or C/G." 18 | 19 | if dim_per_gp > 0: 20 | assert dim % dim_per_gp == 0, \ 21 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 22 | group_gn = dim // dim_per_gp 23 | else: 24 | assert dim % num_groups == 0, \ 25 | "dim: {}, num_groups: {}".format(dim, num_groups) 26 | group_gn = num_groups 27 | 28 | return group_gn 29 | 30 | 31 | def group_norm(out_channels, affine=True, divisor=1): 32 | out_channels = out_channels // divisor 33 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 34 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 35 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 36 | return torch.nn.GroupNorm( 37 | get_group_gn(out_channels, dim_per_gp, num_groups), 38 | out_channels, 39 | eps, 40 | affine 41 | ) 42 | 43 | 44 | def make_conv3x3( 45 | in_channels, 46 | out_channels, 47 | dilation=1, 48 | stride=1, 49 | use_gn=False, 50 | use_relu=False, 51 | kaiming_init=True 52 | ): 53 | conv = Conv2d( 54 | in_channels, 55 | out_channels, 56 | kernel_size=3, 57 | stride=stride, 58 | padding=dilation, 59 | dilation=dilation, 60 | bias=False if use_gn else True 61 | ) 62 | if kaiming_init: 63 | nn.init.kaiming_normal_( 64 | conv.weight, mode="fan_out", nonlinearity="relu" 65 | ) 66 | else: 67 | torch.nn.init.normal_(conv.weight, std=0.01) 68 | if not use_gn: 69 | nn.init.constant_(conv.bias, 0) 70 | module = [conv,] 71 | if use_gn: 72 | module.append(group_norm(out_channels)) 73 | if use_relu: 74 | module.append(nn.ReLU(inplace=True)) 75 | if len(module) > 1: 76 | return nn.Sequential(*module) 77 | return conv 78 | 79 | 80 | def make_fc(dim_in, hidden_dim, use_gn=False): 81 | ''' 82 | Caffe2 implementation uses XavierFill, which in fact 83 | corresponds to kaiming_uniform_ in PyTorch 84 | ''' 85 | if use_gn: 86 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 87 | nn.init.kaiming_uniform_(fc.weight, a=1) 88 | return nn.Sequential(fc, group_norm(hidden_dim)) 89 | fc = nn.Linear(dim_in, hidden_dim) 90 | nn.init.kaiming_uniform_(fc.weight, a=1) 91 | nn.init.constant_(fc.bias, 0) 92 | return fc 93 | 94 | 95 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False): 96 | def make_conv( 97 | in_channels, out_channels, kernel_size, stride=1, dilation=1 98 | ): 99 | conv = Conv2d( 100 | in_channels, 101 | out_channels, 102 | kernel_size=kernel_size, 103 | stride=stride, 104 | padding=dilation * (kernel_size - 1) // 2, 105 | dilation=dilation, 106 | bias=False if use_gn else True 107 | ) 108 | # Caffe2 implementation uses XavierFill, which in fact 109 | # corresponds to kaiming_uniform_ in PyTorch 110 | nn.init.kaiming_uniform_(conv.weight, a=1) 111 | if not use_gn: 112 | nn.init.constant_(conv.bias, 0) 113 | module = [conv,] 114 | if use_gn: 115 | module.append(group_norm(out_channels)) 116 | if use_relu: 117 | module.append(nn.ReLU(inplace=True)) 118 | if len(module) > 1: 119 | return nn.Sequential(*module) 120 | return conv 121 | 122 | return make_conv 123 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | RPN_HEADS = Registry() 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 8 | ROI_BOX_PREDICTOR = Registry() 9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 10 | ROI_KEYPOINT_PREDICTOR = Registry() 11 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 12 | ROI_MASK_PREDICTOR = Registry() 13 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg, in_channels): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels) 19 | self.predictor = make_roi_box_predictor( 20 | cfg, self.feature_extractor.out_channels) 21 | self.post_processor = make_roi_box_post_processor(cfg) 22 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 23 | 24 | def forward(self, features, proposals, targets=None): 25 | """ 26 | Arguments: 27 | features (list[Tensor]): feature-maps from possibly several levels 28 | proposals (list[BoxList]): proposal boxes 29 | targets (list[BoxList], optional): the ground-truth targets. 30 | 31 | Returns: 32 | x (Tensor): the result of the feature extractor 33 | proposals (list[BoxList]): during training, the subsampled proposals 34 | are returned. During testing, the predicted boxlists are returned 35 | losses (dict[Tensor]): During training, returns the losses for the 36 | head. During testing, returns an empty dict. 37 | """ 38 | 39 | if self.training: 40 | # Faster R-CNN subsamples during training the proposals with a fixed 41 | # positive / negative ratio 42 | with torch.no_grad(): 43 | proposals = self.loss_evaluator.subsample(proposals, targets) 44 | 45 | # extract features that will be fed to the final classifier. The 46 | # feature_extractor generally corresponds to the pooler + heads 47 | x = self.feature_extractor(features, proposals) 48 | # final classifier that converts the features into predictions 49 | class_logits, box_regression = self.predictor(x) 50 | 51 | if not self.training: 52 | result = self.post_processor((class_logits, box_regression), proposals) 53 | return x, result, {} 54 | 55 | loss_classifier, loss_box_reg = self.loss_evaluator( 56 | [class_logits], [box_regression] 57 | ) 58 | return ( 59 | x, 60 | proposals, 61 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 62 | ) 63 | 64 | 65 | def build_roi_box_head(cfg, in_channels): 66 | """ 67 | Constructs a new box head. 68 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 69 | and make it a parameter in the config 70 | """ 71 | return ROIBoxHead(cfg, in_channels) 72 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/ratio_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_ratio_post_processor 8 | from .loss import make_roi_ratio_loss_evaluator 9 | from maskrcnn_benchmark.modeling.utils import cat 10 | import torch.nn.functional as F 11 | from maskrcnn_benchmark.layers import smooth_l1_loss 12 | 13 | 14 | class ROIRatioHead(torch.nn.Module): 15 | 16 | def __init__(self, cfg, in_channels): 17 | super(ROIRatioHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels) 19 | self.predictor = make_roi_box_predictor( 20 | cfg, self.feature_extractor.out_channels) 21 | self.post_processor = make_roi_ratio_post_processor(cfg) 22 | self.loss_evaluator = make_roi_ratio_loss_evaluator(cfg) 23 | 24 | def forward(self, features, proposals, targets=None): 25 | """ 26 | Arguments: 27 | features (list[Tensor]): feature-maps from possibly several levels 28 | proposals (list[BoxList]): proposal boxes 29 | targets (list[BoxList], optional): the ground-truth targets. 30 | 31 | Returns: 32 | x (Tensor): the result of the feature extractor 33 | proposals (list[BoxList]): during training, the subsampled proposals 34 | are returned. During testing, the predicted boxlists are returned 35 | losses (dict[Tensor]): During training, returns the losses for the 36 | head. During testing, returns an empty dict. 37 | """ 38 | 39 | if self.training: 40 | # Faster R-CNN subsamples during training the proposals with a fixed 41 | # positive / negative ratio 42 | with torch.no_grad(): 43 | proposals = self.loss_evaluator.subsample(proposals, targets) 44 | 45 | # extract features that will be fed to the final classifier. The 46 | # feature_extractor generally corresponds to the pooler + heads 47 | x = self.feature_extractor(features, proposals) 48 | # final classifier that converts the features into predictions 49 | class_logits, box_reg, fix_reg, ratio_reg = self.predictor(x) 50 | 51 | if not self.training: 52 | result = self.post_processor( 53 | (class_logits, box_reg, fix_reg, ratio_reg), proposals 54 | ) 55 | return x, result, {} 56 | 57 | loss_classifier, loss_box_reg, loss_fix_reg, loss_ratio_reg = self.loss_evaluator( 58 | [class_logits], [box_reg], [fix_reg], [ratio_reg] 59 | ) 60 | 61 | return ( 62 | x, 63 | proposals, 64 | dict( 65 | loss_classifier=loss_classifier, 66 | loss_box_reg=loss_box_reg, 67 | loss_fix_reg=loss_fix_reg, 68 | loss_ratio_reg=loss_ratio_reg 69 | ), 70 | ) 71 | 72 | 73 | def build_roi_ratio_head(cfg, in_channels): 74 | """ 75 | Constructs a new ratio head. 76 | By default, uses ROIRatioHead, but if it turns out not to be enough, just register a new class 77 | and make it a parameter in the config 78 | """ 79 | return ROIRatioHead(cfg, in_channels) 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.modeling import registry 3 | from torch import nn 4 | import torch.nn.functional as F 5 | 6 | 7 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 8 | class FastRCNNPredictor(nn.Module): 9 | def __init__(self, config, in_channels): 10 | super(FastRCNNPredictor, self).__init__() 11 | assert in_channels is not None 12 | 13 | num_inputs = in_channels 14 | 15 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 16 | self.avgpool = nn.AdaptiveAvgPool2d(1) 17 | self.cls_score = nn.Linear(num_inputs, num_classes) 18 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 19 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 20 | 21 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 22 | nn.init.constant_(self.cls_score.bias, 0) 23 | 24 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 25 | nn.init.constant_(self.bbox_pred.bias, 0) 26 | 27 | def forward(self, x): 28 | if len( x.shape ) == 4: 29 | x = self.avgpool(x) 30 | x = x.view(x.size(0), -1) 31 | cls_logit = self.cls_score(x) 32 | bbox_pred = self.bbox_pred(x) 33 | return cls_logit, bbox_pred 34 | 35 | 36 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 37 | class FPNPredictor(nn.Module): 38 | def __init__(self, cfg, in_channels): 39 | super(FPNPredictor, self).__init__() 40 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 41 | representation_size = in_channels 42 | 43 | self.cls_score = nn.Linear(representation_size, num_classes) 44 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 45 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 46 | 47 | nn.init.normal_(self.cls_score.weight, std=0.01) 48 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 49 | for l in [self.cls_score, self.bbox_pred]: 50 | nn.init.constant_(l.bias, 0) 51 | 52 | def forward(self, x): 53 | if x.ndimension() == 4: 54 | assert list(x.shape[2:]) == [1, 1] 55 | x = x.view(x.size(0), -1) 56 | scores = self.cls_score(x) 57 | bbox_deltas = self.bbox_pred(x) 58 | 59 | return scores, bbox_deltas 60 | 61 | # NOTE Mingtao 62 | @registry.ROI_BOX_PREDICTOR.register("FPNRatioPredictor") 63 | @registry.ROI_BOX_PREDICTOR.register("RatioPredictor") 64 | class FPNRatioPredictor(nn.Module): 65 | def __init__(self, cfg, in_channels): 66 | super(FPNRatioPredictor, self).__init__() 67 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 68 | representation_size = in_channels 69 | 70 | self.cls_score = nn.Linear(representation_size, num_classes) 71 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 72 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 73 | self.fix_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 74 | self.ratio_pred = nn.Linear(representation_size, num_bbox_reg_classes * 1) 75 | 76 | nn.init.normal_(self.cls_score.weight, std=0.01) 77 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 78 | nn.init.normal_(self.fix_pred.weight, std=0.001) 79 | nn.init.normal_(self.ratio_pred.weight, std=0.001) 80 | for l in [self.cls_score, self.bbox_pred, self.fix_pred, self.ratio_pred]: 81 | nn.init.constant_(l.bias, 0) 82 | 83 | def forward(self, x): 84 | if x.ndimension() == 4: 85 | assert list(x.shape[2:]) == [1, 1] 86 | x = x.view(x.size(0), -1) 87 | scores = self.cls_score(x) 88 | bbox_deltas = self.bbox_pred(x) 89 | fixes = self.fix_pred(x) 90 | fixes = F.sigmoid( fixes ) 91 | ratios = self.ratio_pred(x) 92 | ratios = F.sigmoid( ratios ) 93 | 94 | return scores, bbox_deltas, fixes, ratios 95 | 96 | def make_roi_box_predictor(cfg, in_channels): 97 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 98 | return func(cfg, in_channels) 99 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg, in_channels): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 14 | self.predictor = make_roi_keypoint_predictor( 15 | cfg, self.feature_extractor.out_channels) 16 | self.post_processor = make_roi_keypoint_post_processor(cfg) 17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 18 | 19 | def forward(self, features, proposals, targets=None): 20 | """ 21 | Arguments: 22 | features (list[Tensor]): feature-maps from possibly several levels 23 | proposals (list[BoxList]): proposal boxes 24 | targets (list[BoxList], optional): the ground-truth targets. 25 | 26 | Returns: 27 | x (Tensor): the result of the feature extractor 28 | proposals (list[BoxList]): during training, the original proposals 29 | are returned. During testing, the predicted boxlists are returned 30 | with the `mask` field set 31 | losses (dict[Tensor]): During training, returns the losses for the 32 | head. During testing, returns an empty dict. 33 | """ 34 | if self.training: 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.subsample(proposals, targets) 37 | 38 | x = self.feature_extractor(features, proposals) 39 | kp_logits = self.predictor(x) 40 | 41 | if not self.training: 42 | result = self.post_processor(kp_logits, proposals) 43 | return x, result, {} 44 | 45 | loss_kp = self.loss_evaluator(proposals, kp_logits) 46 | 47 | return x, proposals, dict(loss_kp=loss_kp) 48 | 49 | 50 | def build_roi_keypoint_head(cfg, in_channels): 51 | return ROIKeypointHead(cfg, in_channels) 52 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.modeling import registry 5 | from maskrcnn_benchmark.modeling.poolers import Pooler 6 | 7 | from maskrcnn_benchmark.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from maskrcnn_benchmark import layers 4 | from maskrcnn_benchmark.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], BoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg, in_channels): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels) 41 | self.predictor = make_roi_mask_predictor( 42 | cfg, self.feature_extractor.out_channels) 43 | self.post_processor = make_roi_mask_post_processor(cfg) 44 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 45 | 46 | def forward(self, features, proposals, targets=None): 47 | """ 48 | Arguments: 49 | features (list[Tensor]): feature-maps from possibly several levels 50 | proposals (list[BoxList]): proposal boxes 51 | targets (list[BoxList], optional): the ground-truth targets. 52 | 53 | Returns: 54 | x (Tensor): the result of the feature extractor 55 | proposals (list[BoxList]): during training, the original proposals 56 | are returned. During testing, the predicted boxlists are returned 57 | with the `mask` field set 58 | losses (dict[Tensor]): During training, returns the losses for the 59 | head. During testing, returns an empty dict. 60 | """ 61 | 62 | if self.training: 63 | # during training, only focus on positive boxes 64 | all_proposals = proposals 65 | proposals, positive_inds = keep_only_positive_boxes(proposals) 66 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 67 | x = features 68 | x = x[torch.cat(positive_inds, dim=0)] 69 | else: 70 | x = self.feature_extractor(features, proposals) 71 | mask_logits = self.predictor(x) 72 | 73 | if not self.training: 74 | result = self.post_processor(mask_logits, proposals) 75 | return x, result, {} 76 | 77 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 78 | 79 | return x, all_proposals, dict(loss_mask=loss_mask) 80 | 81 | 82 | def build_roi_mask_head(cfg, in_channels): 83 | return ROIMaskHead(cfg, in_channels) 84 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.poolers import Pooler 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | registry.ROI_MASK_FEATURE_EXTRACTORS.register( 12 | "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor 13 | ) 14 | 15 | 16 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor") 17 | class MaskRCNNFPNFeatureExtractor(nn.Module): 18 | """ 19 | Heads for FPN for classification 20 | """ 21 | 22 | def __init__(self, cfg, in_channels): 23 | """ 24 | Arguments: 25 | num_classes (int): number of output classes 26 | input_size (int): number of channels of the input once it's flattened 27 | representation_size (int): size of the intermediate representation 28 | """ 29 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 30 | 31 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 32 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 33 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 34 | pooler = Pooler( 35 | output_size=(resolution, resolution), 36 | scales=scales, 37 | sampling_ratio=sampling_ratio, 38 | ) 39 | input_size = in_channels 40 | self.pooler = pooler 41 | 42 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 43 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 44 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 45 | 46 | next_feature = input_size 47 | self.blocks = [] 48 | for layer_idx, layer_features in enumerate(layers, 1): 49 | layer_name = "mask_fcn{}".format(layer_idx) 50 | module = make_conv3x3( 51 | next_feature, layer_features, 52 | dilation=dilation, stride=1, use_gn=use_gn 53 | ) 54 | self.add_module(layer_name, module) 55 | next_feature = layer_features 56 | self.blocks.append(layer_name) 57 | self.out_channels = layer_features 58 | 59 | def forward(self, x, proposals): 60 | x = self.pooler(x, proposals) 61 | 62 | for layer_name in self.blocks: 63 | x = F.relu(getattr(self, layer_name)(x)) 64 | 65 | return x 66 | 67 | 68 | def make_roi_mask_feature_extractor(cfg, in_channels): 69 | func = registry.ROI_MASK_FEATURE_EXTRACTORS[ 70 | cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR 71 | ] 72 | return func(cfg, in_channels) 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | from maskrcnn_benchmark.modeling import registry 8 | 9 | 10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 11 | class MaskRCNNC4Predictor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(MaskRCNNC4Predictor, self).__init__() 14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 16 | num_inputs = in_channels 17 | 18 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 19 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 20 | 21 | for name, param in self.named_parameters(): 22 | if "bias" in name: 23 | nn.init.constant_(param, 0) 24 | elif "weight" in name: 25 | # Caffe2 implementation uses MSRAFill, which in fact 26 | # corresponds to kaiming_normal_ in PyTorch 27 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 28 | 29 | def forward(self, x): 30 | x = F.relu(self.conv5_mask(x)) 31 | return self.mask_fcn_logits(x) 32 | 33 | 34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 35 | class MaskRCNNConv1x1Predictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(MaskRCNNConv1x1Predictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | num_inputs = in_channels 40 | 41 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 42 | 43 | for name, param in self.named_parameters(): 44 | if "bias" in name: 45 | nn.init.constant_(param, 0) 46 | elif "weight" in name: 47 | # Caffe2 implementation uses MSRAFill, which in fact 48 | # corresponds to kaiming_normal_ in PyTorch 49 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 50 | 51 | def forward(self, x): 52 | return self.mask_fcn_logits(x) 53 | 54 | 55 | def make_roi_mask_predictor(cfg, in_channels): 56 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 57 | return func(cfg, in_channels) 58 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .box_head.ratio_head import build_roi_ratio_head 6 | from .mask_head.mask_head import build_roi_mask_head 7 | from .keypoint_head.keypoint_head import build_roi_keypoint_head 8 | 9 | 10 | class CombinedROIHeads(torch.nn.ModuleDict): 11 | """ 12 | Combines a set of individual heads (for box prediction or masks) into a single 13 | head. 14 | """ 15 | 16 | def __init__(self, cfg, heads): 17 | super(CombinedROIHeads, self).__init__(heads) 18 | self.cfg = cfg.clone() 19 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 20 | self.mask.feature_extractor = self.ratio.feature_extractor 21 | if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 22 | self.keypoint.feature_extractor = self.ratio.feature_extractor 23 | 24 | def forward(self, features, proposals, targets=None): 25 | losses = {} 26 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 27 | x, detections, loss_box = self.box(features, proposals, targets) 28 | losses.update(loss_box) 29 | if self.cfg.MODEL.MASK_ON: 30 | mask_features = features 31 | # optimization: during training, if we share the feature extractor between 32 | # the box and the mask heads, then we can reuse the features already computed 33 | if ( 34 | self.training 35 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 36 | ): 37 | mask_features = x 38 | # During training, self.box() will return the unaltered proposals as "detections" 39 | # this makes the API consistent during training and testing 40 | x, detections, loss_mask = self.mask(mask_features, detections, targets) 41 | losses.update(loss_mask) 42 | 43 | if self.cfg.MODEL.KEYPOINT_ON: 44 | keypoint_features = features 45 | # optimization: during training, if we share the feature extractor between 46 | # the box and the mask heads, then we can reuse the features already computed 47 | if ( 48 | self.training 49 | and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 50 | ): 51 | keypoint_features = x 52 | # During training, self.box() will return the unaltered proposals as "detections" 53 | # this makes the API consistent during training and testing 54 | x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets) 55 | losses.update(loss_keypoint) 56 | return x, detections, losses 57 | 58 | 59 | def build_roi_heads(cfg, in_channels): 60 | # individually create the heads, that will be combined together 61 | # afterwards 62 | roi_heads = [] 63 | if cfg.MODEL.RETINANET_ON: 64 | return [] 65 | 66 | if not cfg.MODEL.RPN_ONLY: 67 | # NOTE Mingtao 68 | if cfg.MODEL.RATIO_ON: 69 | roi_heads.append(("box", build_roi_ratio_head(cfg, in_channels))) 70 | else: 71 | roi_heads.append(("box", build_roi_box_head(cfg, in_channels))) 72 | if cfg.MODEL.MASK_ON: 73 | roi_heads.append(("mask", build_roi_mask_head(cfg, in_channels))) 74 | if cfg.MODEL.KEYPOINT_ON: 75 | roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels))) 76 | 77 | # combine individual heads in a single module 78 | if roi_heads: 79 | roi_heads = CombinedROIHeads(cfg, roi_heads) 80 | 81 | return roi_heads 82 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/retinanet/loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains specific functions for computing losses on the RetinaNet 3 | file 4 | """ 5 | 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | from ..utils import concat_box_prediction_layers 10 | 11 | from maskrcnn_benchmark.layers import smooth_l1_loss 12 | from maskrcnn_benchmark.layers import SigmoidFocalLoss 13 | from maskrcnn_benchmark.modeling.matcher import Matcher 14 | from maskrcnn_benchmark.modeling.utils import cat 15 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 16 | from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist 17 | from maskrcnn_benchmark.modeling.rpn.loss import RPNLossComputation 18 | 19 | class RetinaNetLossComputation(RPNLossComputation): 20 | """ 21 | This class computes the RetinaNet loss. 22 | """ 23 | 24 | def __init__(self, proposal_matcher, box_coder, 25 | generate_labels_func, 26 | sigmoid_focal_loss, 27 | bbox_reg_beta=0.11, 28 | regress_norm=1.0): 29 | """ 30 | Arguments: 31 | proposal_matcher (Matcher) 32 | box_coder (BoxCoder) 33 | """ 34 | self.proposal_matcher = proposal_matcher 35 | self.box_coder = box_coder 36 | self.box_cls_loss_func = sigmoid_focal_loss 37 | self.bbox_reg_beta = bbox_reg_beta 38 | self.copied_fields = ['labels'] 39 | self.generate_labels_func = generate_labels_func 40 | self.discard_cases = ['between_thresholds'] 41 | self.regress_norm = regress_norm 42 | 43 | def __call__(self, anchors, box_cls, box_regression, targets): 44 | """ 45 | Arguments: 46 | anchors (list[BoxList]) 47 | box_cls (list[Tensor]) 48 | box_regression (list[Tensor]) 49 | targets (list[BoxList]) 50 | 51 | Returns: 52 | retinanet_cls_loss (Tensor) 53 | retinanet_regression_loss (Tensor 54 | """ 55 | anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] 56 | labels, regression_targets = self.prepare_targets(anchors, targets) 57 | 58 | N = len(labels) 59 | box_cls, box_regression = \ 60 | concat_box_prediction_layers(box_cls, box_regression) 61 | 62 | labels = torch.cat(labels, dim=0) 63 | regression_targets = torch.cat(regression_targets, dim=0) 64 | pos_inds = torch.nonzero(labels > 0).squeeze(1) 65 | 66 | retinanet_regression_loss = smooth_l1_loss( 67 | box_regression[pos_inds], 68 | regression_targets[pos_inds], 69 | beta=self.bbox_reg_beta, 70 | size_average=False, 71 | ) / (max(1, pos_inds.numel() * self.regress_norm)) 72 | 73 | labels = labels.int() 74 | 75 | retinanet_cls_loss = self.box_cls_loss_func( 76 | box_cls, 77 | labels 78 | ) / (pos_inds.numel() + N) 79 | 80 | return retinanet_cls_loss, retinanet_regression_loss 81 | 82 | 83 | def generate_retinanet_labels(matched_targets): 84 | labels_per_image = matched_targets.get_field("labels") 85 | return labels_per_image 86 | 87 | 88 | def make_retinanet_loss_evaluator(cfg, box_coder): 89 | matcher = Matcher( 90 | cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, 91 | cfg.MODEL.RETINANET.BG_IOU_THRESHOLD, 92 | allow_low_quality_matches=True, 93 | ) 94 | sigmoid_focal_loss = SigmoidFocalLoss( 95 | cfg.MODEL.RETINANET.LOSS_GAMMA, 96 | cfg.MODEL.RETINANET.LOSS_ALPHA 97 | ) 98 | 99 | loss_evaluator = RetinaNetLossComputation( 100 | matcher, 101 | box_coder, 102 | generate_retinanet_labels, 103 | sigmoid_focal_loss, 104 | bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA, 105 | regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT, 106 | ) 107 | return loss_evaluator 108 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Utility functions minipulating the prediction layers 4 | """ 5 | 6 | from ..utils import cat 7 | 8 | import torch 9 | 10 | def permute_and_flatten(layer, N, A, C, H, W): 11 | layer = layer.view(N, -1, C, H, W) 12 | layer = layer.permute(0, 3, 4, 1, 2) 13 | layer = layer.reshape(N, -1, C) 14 | return layer 15 | 16 | 17 | def concat_box_prediction_layers(box_cls, box_regression): 18 | box_cls_flattened = [] 19 | box_regression_flattened = [] 20 | # for each feature level, permute the outputs to make them be in the 21 | # same format as the labels. Note that the labels are computed for 22 | # all feature levels concatenated, so we keep the same representation 23 | # for the objectness and the box_regression 24 | for box_cls_per_level, box_regression_per_level in zip( 25 | box_cls, box_regression 26 | ): 27 | N, AxC, H, W = box_cls_per_level.shape 28 | Ax4 = box_regression_per_level.shape[1] 29 | A = Ax4 // 4 30 | C = AxC // A 31 | box_cls_per_level = permute_and_flatten( 32 | box_cls_per_level, N, A, C, H, W 33 | ) 34 | box_cls_flattened.append(box_cls_per_level) 35 | 36 | box_regression_per_level = permute_and_flatten( 37 | box_regression_per_level, N, A, 4, H, W 38 | ) 39 | box_regression_flattened.append(box_regression_per_level) 40 | # concatenate on the first dimension (representing the feature levels), to 41 | # take into account the way the labels were generated (with all feature maps 42 | # being concatenated as well) 43 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 44 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 45 | return box_cls, box_regression 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR 5 | 6 | 7 | def make_optimizer(cfg, model): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 18 | 19 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 20 | return optimizer 21 | 22 | 23 | def make_lr_scheduler(cfg, optimizer): 24 | return WarmupMultiStepLR( 25 | optimizer, 26 | cfg.SOLVER.STEPS, 27 | cfg.SOLVER.GAMMA, 28 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 29 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 30 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 31 | ) 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | if tensors.dim() == 3: 45 | tensors = tensors[None] 46 | assert tensors.dim() == 4 47 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 48 | return ImageList(tensors, image_sizes) 49 | elif isinstance(tensors, (tuple, list)): 50 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 51 | 52 | # TODO Ideally, just remove this and let me model handle arbitrary 53 | # input sizs 54 | if size_divisible > 0: 55 | import math 56 | 57 | stride = size_divisible 58 | max_size = list(max_size) 59 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 60 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 61 | max_size = tuple(max_size) 62 | 63 | batch_shape = (len(tensors),) + max_size 64 | batched_imgs = tensors[0].new(*batch_shape).zero_() 65 | for img, pad_img in zip(tensors, batched_imgs): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | 68 | image_sizes = [im.shape[-2:] for im in tensors] 69 | 70 | return ImageList(batched_imgs, image_sizes) 71 | else: 72 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | 13 | def get_world_size(): 14 | if not dist.is_available(): 15 | return 1 16 | if not dist.is_initialized(): 17 | return 1 18 | return dist.get_world_size() 19 | 20 | 21 | def get_rank(): 22 | if not dist.is_available(): 23 | return 0 24 | if not dist.is_initialized(): 25 | return 0 26 | return dist.get_rank() 27 | 28 | 29 | def is_main_process(): 30 | return get_rank() == 0 31 | 32 | 33 | def synchronize(): 34 | """ 35 | Helper function to synchronize (barrier) among all processes when 36 | using distributed training 37 | """ 38 | if not dist.is_available(): 39 | return 40 | if not dist.is_initialized(): 41 | return 42 | world_size = dist.get_world_size() 43 | if world_size == 1: 44 | return 45 | dist.barrier() 46 | 47 | 48 | def all_gather(data): 49 | """ 50 | Run all_gather on arbitrary picklable data (not necessarily tensors) 51 | Args: 52 | data: any picklable object 53 | Returns: 54 | list[data]: list of data gathered from each rank 55 | """ 56 | world_size = get_world_size() 57 | if world_size == 1: 58 | return [data] 59 | 60 | # serialized to a Tensor 61 | buffer = pickle.dumps(data) 62 | storage = torch.ByteStorage.from_buffer(buffer) 63 | tensor = torch.ByteTensor(storage).to("cuda") 64 | 65 | # obtain Tensor size of each rank 66 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 67 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 68 | dist.all_gather(size_list, local_size) 69 | size_list = [int(size.item()) for size in size_list] 70 | max_size = max(size_list) 71 | 72 | # receiving Tensor from all ranks 73 | # we pad the tensor because torch all_gather does not support 74 | # gathering tensors of different shapes 75 | tensor_list = [] 76 | for _ in size_list: 77 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 78 | if local_size != max_size: 79 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 80 | tensor = torch.cat((tensor, padding), dim=0) 81 | dist.all_gather(tensor_list, tensor) 82 | 83 | data_list = [] 84 | for size, tensor in zip(size_list, tensor_list): 85 | buffer = tensor.cpu().numpy().tobytes()[:size] 86 | data_list.append(pickle.loads(buffer)) 87 | 88 | return data_list 89 | 90 | 91 | def reduce_dict(input_dict, average=True): 92 | """ 93 | Args: 94 | input_dict (dict): all the values will be reduced 95 | average (bool): whether to do average or sum 96 | Reduce the values in the dictionary from all processes so that process with rank 97 | 0 has the averaged results. Returns a dict with the same fields as 98 | input_dict, after reduction. 99 | """ 100 | world_size = get_world_size() 101 | if world_size < 2: 102 | return input_dict 103 | with torch.no_grad(): 104 | names = [] 105 | values = [] 106 | # sort the keys so that they are consistent across processes 107 | for k in sorted(input_dict.keys()): 108 | names.append(k) 109 | values.append(input_dict[k]) 110 | values = torch.stack(values, dim=0) 111 | dist.reduce(values, dst=0) 112 | if dist.get_rank() == 0 and average: 113 | # only main process gets accumulated, so only divide by 114 | # world_size in this case 115 | values /= world_size 116 | reduced_dict = {k: v for k, v in zip(names, values)} 117 | return reduced_dict 118 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import json 4 | import logging 5 | import os 6 | from .comm import is_main_process 7 | 8 | 9 | def mkdir(path): 10 | try: 11 | os.makedirs(path) 12 | except OSError as e: 13 | if e.errno != errno.EEXIST: 14 | raise 15 | 16 | 17 | def save_labels(dataset_list, output_dir): 18 | if is_main_process(): 19 | logger = logging.getLogger(__name__) 20 | 21 | ids_to_labels = {} 22 | for dataset in dataset_list: 23 | if hasattr(dataset, 'categories'): 24 | ids_to_labels.update(dataset.categories) 25 | else: 26 | logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format( 27 | dataset.__class__.__name__)) 28 | 29 | if ids_to_labels: 30 | labels_file = os.path.join(output_dir, 'labels.json') 31 | logger.info("Saving labels mapping into {}".format(labels_file)) 32 | with open(labels_file, 'w') as f: 33 | json.dump(ids_to_labels, f, indent=2) 34 | 35 | 36 | def save_config(cfg, path): 37 | if is_main_process(): 38 | with open(path, 'w') as f: 39 | f.write(cfg.dump()) 40 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | import logging 4 | 5 | import torch 6 | 7 | from maskrcnn_benchmark.utils.imports import import_file 8 | 9 | 10 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict): 11 | """ 12 | Strategy: suppose that the models that we will create will have prefixes appended 13 | to each of its keys, for example due to an extra level of nesting that the original 14 | pre-trained weights from ImageNet won't contain. For example, model.state_dict() 15 | might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains 16 | res2.conv1.weight. We thus want to match both parameters together. 17 | For that, we look for each model weight, look among all loaded keys if there is one 18 | that is a suffix of the current weight name, and use it if that's the case. 19 | If multiple matches exist, take the one with longest size 20 | of the corresponding name. For example, for the same model as before, the pretrained 21 | weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, 22 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 23 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 24 | """ 25 | current_keys = sorted(list(model_state_dict.keys())) 26 | loaded_keys = sorted(list(loaded_state_dict.keys())) 27 | # get a matrix of string matches, where each (i, j) entry correspond to the size of the 28 | # loaded_key string, if it matches 29 | match_matrix = [ 30 | len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys 31 | ] 32 | match_matrix = torch.as_tensor(match_matrix).view( 33 | len(current_keys), len(loaded_keys) 34 | ) 35 | max_match_size, idxs = match_matrix.max(1) 36 | # remove indices that correspond to no-match 37 | idxs[max_match_size == 0] = -1 38 | 39 | # used for logging 40 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 41 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 42 | log_str_template = "{: <{}} loaded from {: <{}} of shape {}" 43 | logger = logging.getLogger(__name__) 44 | for idx_new, idx_old in enumerate(idxs.tolist()): 45 | if idx_old == -1: 46 | continue 47 | key = current_keys[idx_new] 48 | key_old = loaded_keys[idx_old] 49 | model_state_dict[key] = loaded_state_dict[key_old] 50 | logger.info( 51 | log_str_template.format( 52 | key, 53 | max_size, 54 | key_old, 55 | max_size_loaded, 56 | tuple(loaded_state_dict[key_old].shape), 57 | ) 58 | ) 59 | 60 | 61 | def strip_prefix_if_present(state_dict, prefix): 62 | keys = sorted(state_dict.keys()) 63 | if not all(key.startswith(prefix) for key in keys): 64 | return state_dict 65 | stripped_state_dict = OrderedDict() 66 | for key, value in state_dict.items(): 67 | stripped_state_dict[key.replace(prefix, "")] = value 68 | return stripped_state_dict 69 | 70 | 71 | def load_state_dict(model, loaded_state_dict): 72 | model_state_dict = model.state_dict() 73 | # if the state_dict comes from a model that was wrapped in a 74 | # DataParallel or DistributedDataParallel during serialization, 75 | # remove the "module" prefix before performing the matching 76 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 77 | align_and_update_state_dicts(model_state_dict, loaded_state_dict) 78 | 79 | # use strict loading 80 | model.load_state_dict(model_state_dict) 81 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | try: 6 | from torch.hub import _download_url_to_file 7 | from torch.hub import urlparse 8 | from torch.hub import HASH_REGEX 9 | except ImportError: 10 | from torch.utils.model_zoo import _download_url_to_file 11 | from torch.utils.model_zoo import urlparse 12 | from torch.utils.model_zoo import HASH_REGEX 13 | 14 | from maskrcnn_benchmark.utils.comm import is_main_process 15 | from maskrcnn_benchmark.utils.comm import synchronize 16 | 17 | 18 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 19 | # but with a few improvements and modifications 20 | def cache_url(url, model_dir=None, progress=True): 21 | r"""Loads the Torch serialized object at the given URL. 22 | If the object is already present in `model_dir`, it's deserialized and 23 | returned. The filename part of the URL should follow the naming convention 24 | ``filename-.ext`` where ```` is the first eight or more 25 | digits of the SHA256 hash of the contents of the file. The hash is used to 26 | ensure unique names and to verify the contents of the file. 27 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 28 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 29 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 30 | Args: 31 | url (string): URL of the object to download 32 | model_dir (string, optional): directory in which to save the object 33 | progress (bool, optional): whether or not to display a progress bar to stderr 34 | Example: 35 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 36 | """ 37 | if model_dir is None: 38 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 39 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 40 | if not os.path.exists(model_dir): 41 | os.makedirs(model_dir) 42 | parts = urlparse(url) 43 | filename = os.path.basename(parts.path) 44 | if filename == "model_final.pkl": 45 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 46 | # so make the full path the filename by replacing / with _ 47 | filename = parts.path.replace("/", "_") 48 | cached_file = os.path.join(model_dir, filename) 49 | if not os.path.exists(cached_file) and is_main_process(): 50 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 51 | hash_prefix = HASH_REGEX.search(filename) 52 | if hash_prefix is not None: 53 | hash_prefix = hash_prefix.group(1) 54 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 55 | # which matches the hash PyTorch uses. So we skip the hash matching 56 | # if the hash_prefix is less than 6 characters 57 | if len(hash_prefix) < 6: 58 | hash_prefix = None 59 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 60 | synchronize() 61 | return cached_file 62 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/maskrcnn_benchmark/utils/poly_nms/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/poly_nms.py: -------------------------------------------------------------------------------- 1 | import maskrcnn_benchmark.utils.poly_nms.polyiou as polyiou 2 | import numpy as np 3 | 4 | # NOTE QIMENG calculate rotated box roi 5 | def poly_nms(dets, thresh): 6 | scores = dets[:, 8] 7 | polys = [] 8 | areas = [] 9 | for i in range(len(dets)): 10 | tm_polygon = polyiou.VectorDouble([dets[i][0], dets[i][1], 11 | dets[i][2], dets[i][3], 12 | dets[i][4], dets[i][5], 13 | dets[i][6], dets[i][7]]) 14 | polys.append(tm_polygon) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | ovr = [] 20 | i = order[0] 21 | keep.append(i) 22 | for j in range(order.size - 1): 23 | iou = polyiou.iou_poly(polys[i], polys[order[j + 1]]) 24 | ovr.append(iou) 25 | ovr = np.array(ovr) 26 | inds = np.where(ovr <= thresh)[0] 27 | order = order[inds + 1] 28 | return keep 29 | 30 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/polyiou.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | #define maxn 510 9 | const double eps=1E-8; 10 | int sig(double d){ 11 | return(d>eps)-(d<-eps); 12 | } 13 | struct Point{ 14 | double x,y; Point(){} 15 | Point(double x,double y):x(x),y(y){} 16 | bool operator==(const Point&p)const{ 17 | return sig(x-p.x)==0&&sig(y-p.y)==0; 18 | } 19 | }; 20 | double cross(Point o,Point a,Point b){ //叉积 21 | return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y); 22 | } 23 | double area(Point* ps,int n){ 24 | ps[n]=ps[0]; 25 | double res=0; 26 | for(int i=0;i0) pp[m++]=p[i]; 49 | if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1]))) 50 | lineCross(a,b,p[i],p[i+1],pp[m++]); 51 | } 52 | n=0; 53 | for(int i=0;i1&&p[n-1]==p[0])n--; 57 | } 58 | //---------------华丽的分隔线-----------------// 59 | //返回三角形oab和三角形ocd的有向交面积,o是原点// 60 | double intersectArea(Point a,Point b,Point c,Point d){ 61 | Point o(0,0); 62 | int s1=sig(cross(o,a,b)); 63 | int s2=sig(cross(o,c,d)); 64 | if(s1==0||s2==0)return 0.0;//退化,面积为0 65 | if(s1==-1) swap(a,b); 66 | if(s2==-1) swap(c,d); 67 | Point p[10]={o,a,b}; 68 | int n=3; 69 | polygon_cut(p,n,o,c); 70 | polygon_cut(p,n,c,d); 71 | polygon_cut(p,n,d,o); 72 | double res=fabs(area(p,n)); 73 | if(s1*s2==-1) res=-res;return res; 74 | } 75 | //求两多边形的交面积 76 | double intersectArea(Point*ps1,int n1,Point*ps2,int n2){ 77 | if(area(ps1,n1)<0) reverse(ps1,ps1+n1); 78 | if(area(ps2,n2)<0) reverse(ps2,ps2+n2); 79 | ps1[n1]=ps1[0]; 80 | ps2[n2]=ps2[0]; 81 | double res=0; 82 | for(int i=0;i p, vector q) { 94 | Point ps1[maxn],ps2[maxn]; 95 | int n1 = 4; 96 | int n2 = 4; 97 | for (int i = 0; i < 4; i++) { 98 | ps1[i].x = p[i * 2]; 99 | ps1[i].y = p[i * 2 + 1]; 100 | 101 | ps2[i].x = q[i * 2]; 102 | ps2[i].y = q[i * 2 + 1]; 103 | } 104 | double inter_area = intersectArea(ps1, n1, ps2, n2); 105 | double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area; 106 | double iou = inter_area / union_area; 107 | 108 | 109 | // cout << "inter_area:" << inter_area << endl; 110 | // cout << "union_area:" << union_area << endl; 111 | // cout << "iou:" << iou << endl; 112 | 113 | return iou; 114 | } 115 | // 116 | //int main(){ 117 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 118 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 119 | // vector P(p, p + 8); 120 | // vector Q(q, q + 8); 121 | // iou_poly(P, Q); 122 | // return 0; 123 | //} 124 | 125 | //int main(){ 126 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 127 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 128 | // iou_poly(p, q); 129 | // return 0; 130 | //} 131 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/polyiou.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-2-3. 3 | // 4 | 5 | #ifndef POLYIOU_POLYIOU_H 6 | #define POLYIOU_POLYIOU_H 7 | 8 | #include 9 | double iou_poly(std::vector p, std::vector q); 10 | #endif //POLYIOU_POLYIOU_H 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/polyiou.i: -------------------------------------------------------------------------------- 1 | %module polyiou 2 | %include "std_vector.i" 3 | 4 | namespace std { 5 | %template(VectorDouble) vector; 6 | }; 7 | 8 | %{ 9 | #define SWIG_FILE_WITH_INIT 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "polyiou.h" 16 | %} 17 | 18 | %include "polyiou.h" 19 | 20 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/poly_nms/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | setup.py file for SWIG example 3 | """ 4 | from distutils.core import setup, Extension 5 | import numpy 6 | 7 | polyiou_module = Extension('_polyiou', 8 | sources=['polyiou_wrap.cxx', 'polyiou.cpp'], 9 | ) 10 | setup(name = 'polyiou', 11 | version = '0.1', 12 | author = "SWIG Docs", 13 | description = """Simple swig example from docs""", 14 | ext_modules = [polyiou_module], 15 | py_modules = ["polyiou"], 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /prepare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from DOTA_devkit.ImgSplit_multi_process import splitbase 4 | from DOTA_devkit.SplitOnlyImage_multi_process import splitbase as splitbase_onlyimg 5 | from txt2json import collect_unaug_dataset, convert 6 | 7 | with open( "config.json" ) as config_f: 8 | CONFIG = json.load( config_f ) 9 | O_ROOT = CONFIG["original_root_dir"] 10 | T_ROOT = CONFIG["target_root_dir"] 11 | SETs = CONFIG["sets"] 12 | 13 | # split 14 | for SET in SETs: 15 | if SET["only_img"]: 16 | if not os.path.exists( os.path.join( T_ROOT, SET["name"] + "_cut", "images" ) ): 17 | os.makedirs( os.path.join( T_ROOT, SET["name"] + "_cut", "images" ) ) 18 | split = splitbase_onlyimg( os.path.join( O_ROOT, SET["name"], "images" ), os.path.join( T_ROOT, SET["name"] + "_cut", "images" ), gap=SET["gap"], subsize=1024, num_process=8, padding=False ) 19 | else: 20 | split = splitbase( os.path.join( O_ROOT, SET["name"] ), os.path.join( T_ROOT, SET["name"] + "_cut" ), gap=SET["gap"], subsize=1024, num_process=8, padding=False ) 21 | split.splitdata( 1 ) 22 | split.splitdata( 0.5 ) 23 | 24 | # class balancing of training set 25 | # NOTE This may be not optimal, and you can choose a different strategy. 26 | img_dic = collect_unaug_dataset( os.path.join( T_ROOT, "trainval_cut", "labelTxt" ) ) 27 | img_dic["storage-tank"] = img_dic["storage-tank"] + img_dic["storage-tank"][:526] 28 | img_dic["baseball-diamond"] = img_dic["baseball-diamond"] * 2 + img_dic["baseball-diamond"][:202] 29 | img_dic["ground-track-field"] = img_dic["ground-track-field"] + img_dic["ground-track-field"][:575] 30 | img_dic["swimming-pool"] = img_dic["swimming-pool"] * 2 + img_dic["swimming-pool"][:104] 31 | img_dic["soccer-ball-field"] = img_dic["soccer-ball-field"] + img_dic["soccer-ball-field"][:962] 32 | img_dic["roundabout"] = img_dic["roundabout"] + img_dic["roundabout"][:711] 33 | img_dic["tennis-court"] = img_dic["tennis-court"] + img_dic["tennis-court"][:655] 34 | 35 | img_dic["basketball-court"] = img_dic["basketball-court"] * 4 36 | img_dic["helicopter"] = img_dic["helicopter"] * 8 37 | 38 | convert( img_dic, os.path.join( T_ROOT, "trainval_cut" ), os.path.join( T_ROOT, "trainval_cut", "trainval_cut.json" ) ) 39 | img_dic_test = {"all": [i[:-4] for i in os.listdir( os.path.join( T_ROOT, "test_cut", "images" ) )]} 40 | convert( img_dic_test, os.path.join( T_ROOT, "test_cut" ), os.path.join( T_ROOT, "test_cut", "test_cut.json" ) ) 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | yacs 3 | cython 4 | matplotlib 5 | tqdm 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Convert instances from png files to a dictionary 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111 5 | 6 | from __future__ import print_function, absolute_import, division 7 | import os, sys 8 | 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 10 | from csHelpers import * 11 | 12 | # Cityscapes imports 13 | from cityscapesscripts.evaluation.instance import * 14 | from cityscapesscripts.helpers.csHelpers import * 15 | import cv2 16 | from maskrcnn_benchmark.utils import cv2_util 17 | 18 | 19 | def instances2dict_with_polygons(imageFileList, verbose=False): 20 | imgCount = 0 21 | instanceDict = {} 22 | 23 | if not isinstance(imageFileList, list): 24 | imageFileList = [imageFileList] 25 | 26 | if verbose: 27 | print("Processing {} images...".format(len(imageFileList))) 28 | 29 | for imageFileName in imageFileList: 30 | # Load image 31 | img = Image.open(imageFileName) 32 | 33 | # Image as numpy array 34 | imgNp = np.array(img) 35 | 36 | # Initialize label categories 37 | instances = {} 38 | for label in labels: 39 | instances[label.name] = [] 40 | 41 | # Loop through all instance ids in instance image 42 | for instanceId in np.unique(imgNp): 43 | if instanceId < 1000: 44 | continue 45 | instanceObj = Instance(imgNp, instanceId) 46 | instanceObj_dict = instanceObj.toDict() 47 | 48 | #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict()) 49 | if id2label[instanceObj.labelID].hasInstances: 50 | mask = (imgNp == instanceId).astype(np.uint8) 51 | contour, hier = cv2_util.findContours( 52 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 53 | 54 | polygons = [c.reshape(-1).tolist() for c in contour] 55 | instanceObj_dict['contours'] = polygons 56 | 57 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 58 | 59 | imgKey = os.path.abspath(imageFileName) 60 | instanceDict[imgKey] = instances 61 | imgCount += 1 62 | 63 | if verbose: 64 | print("\rImages Processed: {}".format(imgCount), end=' ') 65 | sys.stdout.flush() 66 | 67 | if verbose: 68 | print("") 69 | 70 | return instanceDict 71 | 72 | def main(argv): 73 | fileList = [] 74 | if (len(argv) > 2): 75 | for arg in argv: 76 | if ("png" in arg): 77 | fileList.append(arg) 78 | instances2dict_with_polygons(fileList, True) 79 | 80 | if __name__ == "__main__": 81 | main(sys.argv[1:]) 82 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Set up custom environment before nearly anything else is imported 3 | # NOTE: this should be the first import (no not reorder) 4 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 5 | 6 | import argparse 7 | import os 8 | 9 | import torch 10 | from maskrcnn_benchmark.config import cfg 11 | from maskrcnn_benchmark.data import make_data_loader 12 | from maskrcnn_benchmark.engine.inference import inference 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank 17 | from maskrcnn_benchmark.utils.logger import setup_logger 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir 19 | 20 | # Check if we can enable mixed-precision via apex.amp 21 | try: 22 | from apex import amp 23 | except ImportError: 24 | raise ImportError('Use APEX for mixed precision via apex.amp') 25 | 26 | 27 | def main(): 28 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 29 | parser.add_argument( 30 | "--config-file", 31 | default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", 32 | metavar="FILE", 33 | help="path to config file", 34 | ) 35 | parser.add_argument("--local_rank", type=int, default=0) 36 | parser.add_argument( 37 | "--ckpt", 38 | help="The path to the checkpoint for test, default is the latest checkpoint.", 39 | default=None, 40 | ) 41 | parser.add_argument( 42 | "opts", 43 | help="Modify config options using the command-line", 44 | default=None, 45 | nargs=argparse.REMAINDER, 46 | ) 47 | 48 | args = parser.parse_args() 49 | 50 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 51 | distributed = num_gpus > 1 52 | 53 | if distributed: 54 | torch.cuda.set_device(args.local_rank) 55 | torch.distributed.init_process_group( 56 | backend="nccl", init_method="env://" 57 | ) 58 | synchronize() 59 | 60 | cfg.merge_from_file(args.config_file) 61 | cfg.merge_from_list(args.opts) 62 | cfg.freeze() 63 | 64 | save_dir = "" 65 | logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) 66 | logger.info("Using {} GPUs".format(num_gpus)) 67 | logger.info(cfg) 68 | 69 | logger.info("Collecting env info (might take some time)") 70 | logger.info("\n" + collect_env_info()) 71 | 72 | model = build_detection_model(cfg) 73 | model.to(cfg.MODEL.DEVICE) 74 | 75 | # Initialize mixed-precision if necessary 76 | use_mixed_precision = cfg.DTYPE == 'float16' 77 | amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) 78 | 79 | output_dir = cfg.OUTPUT_DIR 80 | checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) 81 | ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt 82 | _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) 83 | 84 | iou_types = ("bbox",) 85 | if cfg.MODEL.MASK_ON: 86 | iou_types = iou_types + ("segm",) 87 | if cfg.MODEL.KEYPOINT_ON: 88 | iou_types = iou_types + ("keypoints",) 89 | output_folders = [None] * len(cfg.DATASETS.TEST) 90 | dataset_names = cfg.DATASETS.TEST 91 | if cfg.OUTPUT_DIR: 92 | for idx, dataset_name in enumerate(dataset_names): 93 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 94 | mkdir(output_folder) 95 | output_folders[idx] = output_folder 96 | data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) 97 | for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): 98 | inference( 99 | model, 100 | data_loader_val, 101 | dataset_name=dataset_name, 102 | iou_types=iou_types, 103 | box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, 104 | device=cfg.MODEL.DEVICE, 105 | expected_results=cfg.TEST.EXPECTED_RESULTS, 106 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 107 | output_folder=output_folder, 108 | ) 109 | synchronize() 110 | 111 | 112 | if __name__ == "__main__": 113 | main() 114 | -------------------------------------------------------------------------------- /val_result.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoFu/gliding_vertex/c4470140265140e118725b80a81efe68e44e10af/val_result.zip --------------------------------------------------------------------------------