├── README.md ├── client.py ├── client_pipeline.py ├── configs ├── Base-RCNN-C4.yaml ├── Base-RCNN-DilatedC5.yaml ├── Base-RCNN-FPN.yaml ├── Base-RetinaNet.yaml ├── COCO-Detection │ ├── fast_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_101_C4_3x.yaml │ ├── faster_rcnn_R_101_DC5_3x.yaml │ ├── faster_rcnn_R_101_FPN_3x.yaml │ ├── faster_rcnn_R_50_C4_1x.yaml │ ├── faster_rcnn_R_50_C4_3x.yaml │ ├── faster_rcnn_R_50_DC5_1x.yaml │ ├── faster_rcnn_R_50_DC5_3x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_3x.yaml │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── fcos_R_50_FPN_1x.py │ ├── retinanet_R_101_FPN_3x.yaml │ ├── retinanet_R_50_FPN_1x.py │ ├── retinanet_R_50_FPN_1x.yaml │ ├── retinanet_R_50_FPN_3x.yaml │ ├── rpn_R_50_C4_1x.yaml │ └── rpn_R_50_FPN_1x.yaml ├── COCO-InstanceSegmentation │ ├── mask_rcnn_R_101_C4_3x.yaml │ ├── mask_rcnn_R_101_DC5_3x.yaml │ ├── mask_rcnn_R_101_FPN_3x.yaml │ ├── mask_rcnn_R_50_C4_1x.py │ ├── mask_rcnn_R_50_C4_1x.yaml │ ├── mask_rcnn_R_50_C4_3x.yaml │ ├── mask_rcnn_R_50_DC5_1x.yaml │ ├── mask_rcnn_R_50_DC5_3x.yaml │ ├── mask_rcnn_R_50_FPN_1x.py │ ├── mask_rcnn_R_50_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x_giou.yaml │ ├── mask_rcnn_R_50_FPN_3x.yaml │ ├── mask_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── mask_rcnn_regnetx_4gf_dds_fpn_1x.py │ └── mask_rcnn_regnety_4gf_dds_fpn_1x.py ├── COCO-Keypoints │ ├── Base-Keypoint-RCNN-FPN.yaml │ ├── keypoint_rcnn_R_101_FPN_3x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.py │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_3x.yaml │ └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-PanopticSegmentation │ ├── Base-Panoptic-FPN.yaml │ ├── panoptic_fpn_R_101_3x.yaml │ ├── panoptic_fpn_R_50_1x.py │ ├── panoptic_fpn_R_50_1x.yaml │ └── panoptic_fpn_R_50_3x.yaml ├── Cityscapes │ └── mask_rcnn_R_50_FPN.yaml ├── Detectron1-Comparisons │ ├── README.md │ ├── faster_rcnn_R_50_FPN_noaug_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_R_50_FPN_noaug_1x.yaml ├── LVISv0.5-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── LVISv1-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── Misc │ ├── cascade_mask_rcnn_R_50_FPN_1x.yaml │ ├── cascade_mask_rcnn_R_50_FPN_3x.yaml │ ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml │ ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml │ ├── mmdet_mask_rcnn_R_50_FPN_1x.py │ ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml │ ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml │ ├── semantic_R_50_FPN_1x.yaml │ └── torchvision_imagenet_R_50.py ├── PascalVOC-Detection │ ├── faster_rcnn_R_50_C4.yaml │ └── faster_rcnn_R_50_FPN.yaml ├── common │ ├── README.md │ ├── coco_schedule.py │ ├── data │ │ ├── coco.py │ │ ├── coco_keypoint.py │ │ └── coco_panoptic_separated.py │ ├── models │ │ ├── cascade_rcnn.py │ │ ├── fcos.py │ │ ├── keypoint_rcnn_fpn.py │ │ ├── mask_rcnn_c4.py │ │ ├── mask_rcnn_fpn.py │ │ ├── panoptic_fpn.py │ │ └── retinanet.py │ ├── optim.py │ └── train.py ├── new_baselines │ ├── mask_rcnn_R_101_FPN_100ep_LSJ.py │ ├── mask_rcnn_R_101_FPN_200ep_LSJ.py │ ├── mask_rcnn_R_101_FPN_400ep_LSJ.py │ ├── mask_rcnn_R_50_FPN_100ep_LSJ.py │ ├── mask_rcnn_R_50_FPN_200ep_LSJ.py │ ├── mask_rcnn_R_50_FPN_400ep_LSJ.py │ ├── mask_rcnn_R_50_FPN_50ep_LSJ.py │ ├── mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py │ ├── mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py │ ├── mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py │ ├── mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py │ ├── mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py │ └── mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py └── quick_schedules │ ├── README.md │ ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml │ ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── fast_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml │ ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml │ ├── mask_rcnn_R_50_C4_inference_acc_test.yaml │ ├── mask_rcnn_R_50_C4_instant_test.yaml │ ├── mask_rcnn_R_50_C4_training_acc_test.yaml │ ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_instant_test.yaml │ ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_training_acc_test.yaml │ ├── panoptic_fpn_R_50_inference_acc_test.yaml │ ├── panoptic_fpn_R_50_instant_test.yaml │ ├── panoptic_fpn_R_50_training_acc_test.yaml │ ├── retinanet_R_50_FPN_inference_acc_test.yaml │ ├── retinanet_R_50_FPN_instant_test.yaml │ ├── rpn_R_50_FPN_inference_acc_test.yaml │ ├── rpn_R_50_FPN_instant_test.yaml │ ├── semantic_R_50_FPN_inference_acc_test.yaml │ ├── semantic_R_50_FPN_instant_test.yaml │ └── semantic_R_50_FPN_training_acc_test.yaml ├── export_model.py ├── models ├── infer_pipeline │ └── config.pbtxt ├── mask_rcnn │ └── config.pbtxt ├── postprocess │ └── config.pbtxt └── preprocess │ └── config.pbtxt ├── python_models ├── postprocess.py ├── preprocess.py └── requirements.txt └── run_triton.sh /README.md: -------------------------------------------------------------------------------- 1 | # deploy-detectron2-with-triton 2 | This is accompanying code for the [post](https://tintn.github.io/deploy-detectron2-with-triton/) "Deploy Detectron2 models with Triton" 3 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from concurrent.futures import ThreadPoolExecutor, wait 3 | import time 4 | import tritonclient.http as httpclient 5 | from tqdm import tqdm 6 | from PIL import Image 7 | import numpy as np 8 | 9 | 10 | def test_infer(req_id, image_file, model_name, print_output=False): 11 | img = np.array(Image.open(image_file)) 12 | img = np.ascontiguousarray(img.transpose(2, 0, 1)) 13 | # Define model's inputs 14 | inputs = [] 15 | inputs.append(httpclient.InferInput('image__0', img.shape, "UINT8")) 16 | inputs[0].set_data_from_numpy(img) 17 | # Define model's outputs 18 | outputs = [] 19 | outputs.append(httpclient.InferRequestedOutput('bboxes__0')) 20 | outputs.append(httpclient.InferRequestedOutput('classes__1')) 21 | outputs.append(httpclient.InferRequestedOutput('masks__2')) 22 | outputs.append(httpclient.InferRequestedOutput('scores__3')) 23 | outputs.append(httpclient.InferRequestedOutput('shape__4')) 24 | # Send request to Triton server 25 | triton_client = httpclient.InferenceServerClient( 26 | url="localhost:8000", verbose=False) 27 | results = triton_client.infer(model_name, inputs=inputs, outputs=outputs) 28 | response_info = results.get_response() 29 | outputs = {} 30 | for output_info in response_info['outputs']: 31 | output_name = output_info['name'] 32 | outputs[output_name] = results.as_numpy(output_name) 33 | 34 | if print_output: 35 | print(req_id, outputs) 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--image', required=True) 41 | parser.add_argument('--model', required=True) 42 | parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent']) 43 | parser.add_argument('--num-reqs', default='1') 44 | parser.add_argument('--print-output', action='store_true') 45 | return parser.parse_args() 46 | 47 | 48 | if __name__ == '__main__': 49 | args = parse_args() 50 | image_file = args.image 51 | model_name = args.model 52 | mode = args.mode 53 | n_reqs = int(args.num_reqs) 54 | 55 | if mode == 'sequential': 56 | for i in tqdm(range(n_reqs)): 57 | test_infer(i, image_file, model_name, args.print_output) 58 | elif mode == 'concurrent': 59 | s = time.time() 60 | with ThreadPoolExecutor(max_workers=10) as executor: 61 | futures = [ 62 | executor.submit(test_infer, 63 | i, 64 | image_file, 65 | model_name, 66 | args.print_output) 67 | for i in range(n_reqs) 68 | ] 69 | wait(futures) 70 | for f in futures: 71 | f.results() 72 | e = time.time() 73 | print('FPS:', n_reqs/(e - s)) 74 | -------------------------------------------------------------------------------- /client_pipeline.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from concurrent.futures import ThreadPoolExecutor, wait 3 | import time 4 | import tritonclient.http as httpclient 5 | from tqdm import tqdm 6 | from PIL import Image 7 | import numpy as np 8 | 9 | 10 | def test_infer(req_id, image_file, model_name, print_output=False): 11 | with open(image_file, 'rb') as fi: 12 | image_bytes = fi.read() 13 | image_bytes = np.array([image_bytes], dtype=np.bytes_) 14 | # Define model's inputs 15 | inputs = [] 16 | inputs.append(httpclient.InferInput('IMAGE_BYTES', image_bytes.shape, "BYTES")) 17 | inputs[0].set_data_from_numpy(image_bytes) 18 | # Define model's outputs 19 | outputs = [] 20 | outputs.append(httpclient.InferRequestedOutput('BBOXES')) 21 | outputs.append(httpclient.InferRequestedOutput('CLASSES')) 22 | outputs.append(httpclient.InferRequestedOutput('MASKS')) 23 | outputs.append(httpclient.InferRequestedOutput('SCORES')) 24 | # Send request to Triton server 25 | triton_client = httpclient.InferenceServerClient( 26 | url="localhost:8000", verbose=False) 27 | results = triton_client.infer(model_name, inputs=inputs, outputs=outputs) 28 | response_info = results.get_response() 29 | outputs = {} 30 | for output_info in response_info['outputs']: 31 | output_name = output_info['name'] 32 | outputs[output_name] = results.as_numpy(output_name) 33 | 34 | if print_output: 35 | print(req_id, outputs) 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--image', required=True) 41 | parser.add_argument('--model', required=True) 42 | parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent']) 43 | parser.add_argument('--num-reqs', default='1') 44 | parser.add_argument('--print-output', action='store_true') 45 | return parser.parse_args() 46 | 47 | 48 | if __name__ == '__main__': 49 | args = parse_args() 50 | image_file = args.image 51 | model_name = args.model 52 | mode = args.mode 53 | n_reqs = int(args.num_reqs) 54 | 55 | if mode == 'sequential': 56 | for i in tqdm(range(n_reqs)): 57 | test_infer(i, image_file, model_name, args.print_output) 58 | elif mode == 'concurrent': 59 | s = time.time() 60 | with ThreadPoolExecutor(max_workers=10) as executor: 61 | futures = [ 62 | executor.submit(test_infer, 63 | i, 64 | image_file, 65 | model_name, 66 | args.print_output) 67 | for i in range(n_reqs) 68 | ] 69 | wait(futures) 70 | for f in futures: 71 | f.results() 72 | e = time.time() 73 | print(n_reqs/(e - s)) 74 | -------------------------------------------------------------------------------- /configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 19 | -------------------------------------------------------------------------------- /configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | INPUT: 24 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 25 | VERSION: 2 26 | -------------------------------------------------------------------------------- /configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Detection/fcos_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.fcos import model 5 | from ..common.train import train 6 | 7 | dataloader.train.mapper.use_instance_mask = False 8 | optimizer.lr = 0.01 9 | 10 | model.backbone.bottom_up.freeze_at = 2 11 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 12 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.retinanet import model 5 | from ..common.train import train 6 | 7 | dataloader.train.mapper.use_instance_mask = False 8 | model.backbone.bottom_up.freeze_at = 2 9 | optimizer.lr = 0.01 10 | 11 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 12 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.train import train 2 | from ..common.optim import SGD as optimizer 3 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 4 | from ..common.data.coco import dataloader 5 | from ..common.models.mask_rcnn_c4 import model 6 | 7 | model.backbone.freeze_at = 2 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | model.backbone.bottom_up.freeze_at = 2 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | RPN: 8 | BBOX_REG_LOSS_TYPE: "giou" 9 | BBOX_REG_LOSS_WEIGHT: 2.0 10 | ROI_BOX_HEAD: 11 | BBOX_REG_LOSS_TYPE: "giou" 12 | BBOX_REG_LOSS_WEIGHT: 10.0 13 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | 12 | # Replace default ResNet with RegNetX-4GF from the DDS paper. Config source: 13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=23, 19 | w_a=38.65, 20 | w_0=96, 21 | w_m=2.43, 22 | group_width=40, 23 | freeze_at=2, 24 | norm="FrozenBN", 25 | out_features=["s1", "s2", "s3", "s4"], 26 | ) 27 | model.pixel_std = [57.375, 57.120, 58.395] 28 | 29 | optimizer.weight_decay = 5e-5 30 | train.init_checkpoint = ( 31 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth" 32 | ) 33 | # RegNets benefit from enabling cudnn benchmark mode 34 | train.cudnn_benchmark = True 35 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | 12 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source: 13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=22, 19 | w_a=31.41, 20 | w_0=96, 21 | w_m=2.24, 22 | group_width=64, 23 | se_ratio=0.25, 24 | freeze_at=2, 25 | norm="FrozenBN", 26 | out_features=["s1", "s2", "s3", "s4"], 27 | ) 28 | model.pixel_std = [57.375, 57.120, 58.395] 29 | 30 | optimizer.weight_decay = 5e-5 31 | train.init_checkpoint = ( 32 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth" 33 | ) 34 | # RegNets benefit from enabling cudnn benchmark mode 35 | train.cudnn_benchmark = True 36 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | KEYPOINT_ON: True 4 | ROI_HEADS: 5 | NUM_CLASSES: 1 6 | ROI_BOX_HEAD: 7 | SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss 8 | RPN: 9 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 10 | # 1000 proposals per-image is found to hurt box AP. 11 | # Therefore we increase it to 1500 per-image. 12 | POST_NMS_TOPK_TRAIN: 1500 13 | DATASETS: 14 | TRAIN: ("keypoints_coco_2017_train",) 15 | TEST: ("keypoints_coco_2017_val",) 16 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco_keypoint import dataloader 4 | from ..common.models.keypoint_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | model.backbone.bottom_up.freeze_at = 2 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (210000, 250000) 12 | MAX_ITER: 270000 13 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | DATALOADER: 11 | FILTER_EMPTY_ANNOTATIONS: False 12 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco_panoptic_separated import dataloader 4 | from ..common.models.panoptic_fpn import model 5 | from ..common.train import train 6 | 7 | model.backbone.bottom_up.freeze_at = 2 8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | # For better, more stable performance initialize from COCO 5 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 6 | MASK_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 8 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A 10 | # But there are some differences, e.g., we did not initialize the output 11 | # layer using the corresponding classes from COCO 12 | INPUT: 13 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 14 | MIN_SIZE_TRAIN_SAMPLING: "choice" 15 | MIN_SIZE_TEST: 1024 16 | MAX_SIZE_TRAIN: 2048 17 | MAX_SIZE_TEST: 2048 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instance_seg_train",) 20 | TEST: ("cityscapes_fine_instance_seg_val",) 21 | SOLVER: 22 | BASE_LR: 0.01 23 | STEPS: (18000,) 24 | MAX_ITER: 24000 25 | IMS_PER_BATCH: 8 26 | TEST: 27 | EVAL_PERIOD: 8000 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/README.md: -------------------------------------------------------------------------------- 1 | 2 | Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. 3 | 4 | The differences in implementation details are shared in 5 | [Compatibility with Other Libraries](../../docs/notes/compatibility.md). 6 | 7 | The differences in model zoo's experimental settings include: 8 | * Use scale augmentation during training. This improves AP with lower training cost. 9 | * Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may 10 | affect other AP. 11 | * Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. 12 | * Use `ROIAlignV2`. This does not significantly affect AP. 13 | 14 | In this directory, we provide a few configs that __do not__ have the above changes. 15 | They mimic Detectron's behavior as close as possible, 16 | and provide a fair comparison of accuracy and speed against Detectron. 17 | 18 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
74 | 75 | ## Comparisons: 76 | 77 | * Faster R-CNN: Detectron's AP is 36.7, similar to ours. 78 | * Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's 79 | [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be 80 | compensated back by some parameter tuning. 81 | * Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. 82 | See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details. 83 | 84 | For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). 85 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | INPUT: 16 | # no scale augmentation 17 | MIN_SIZE_TRAIN: (800, ) 18 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_KEYPOINT_HEAD: 10 | POOLER_RESOLUTION: 14 11 | POOLER_SAMPLING_RATIO: 2 12 | POOLER_TYPE: "ROIAlign" 13 | # Detectron1 uses smooth L1 loss with some magic beta values. 14 | # The defaults are changed to L1 loss in Detectron2. 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 17 | POOLER_SAMPLING_RATIO: 2 18 | POOLER_TYPE: "ROIAlign" 19 | RPN: 20 | SMOOTH_L1_BETA: 0.1111 21 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 22 | # 1000 proposals per-image is found to hurt box AP. 23 | # Therefore we increase it to 1500 per-image. 24 | POST_NMS_TOPK_TRAIN: 1500 25 | DATASETS: 26 | TRAIN: ("keypoints_coco_2017_train",) 27 | TEST: ("keypoints_coco_2017_val",) 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | ROI_MASK_HEAD: 16 | POOLER_SAMPLING_RATIO: 2 17 | POOLER_TYPE: "ROIAlign" 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1230 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v0.5_train",) 18 | TEST: ("lvis_v0.5_val",) 19 | TEST: 20 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 21 | DATALOADER: 22 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 23 | REPEAT_THRESHOLD: 0.001 24 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1203 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train",) 18 | TEST: ("lvis_v1_val",) 19 | SOLVER: 20 | STEPS: (120000, 160000) 21 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 22 | TEST: 23 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 24 | DATALOADER: 25 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 26 | REPEAT_THRESHOLD: 0.001 27 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | SOLVER: 14 | STEPS: (210000, 250000) 15 | MAX_ITER: 270000 16 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_CONV: 4 16 | NUM_FC: 1 17 | NORM: "GN" 18 | CLS_AGNOSTIC_BBOX_REG: True 19 | ROI_MASK_HEAD: 20 | NUM_CONV: 8 21 | NORM: "GN" 22 | RPN: 23 | POST_NMS_TOPK_TRAIN: 2000 24 | SOLVER: 25 | IMS_PER_BATCH: 128 26 | STEPS: (35000, 45000) 27 | MAX_ITER: 50000 28 | BASE_LR: 0.16 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 864) 31 | MIN_SIZE_TRAIN_SAMPLING: "range" 32 | MAX_SIZE_TRAIN: 1440 33 | CROP: 34 | ENABLED: True 35 | TEST: 36 | EVAL_PERIOD: 2500 37 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_BOX_HEAD: 8 | CLS_AGNOSTIC_BBOX_REG: True 9 | ROI_MASK_HEAD: 10 | CLS_AGNOSTIC_MASK: True 11 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "GN" 8 | STRIDE_IN_1X1: False 9 | FPN: 10 | NORM: "GN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "GN" 16 | ROI_MASK_HEAD: 17 | NORM: "GN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "SyncBN" 8 | STRIDE_IN_1X1: True 9 | FPN: 10 | NORM: "SyncBN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "SyncBN" 16 | ROI_MASK_HEAD: 17 | NORM: "SyncBN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | TEST: 23 | PRECISE_BN: 24 | ENABLED: True 25 | -------------------------------------------------------------------------------- /configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | # An example config to train a mmdetection model using detectron2. 2 | 3 | from ..common.data.coco import dataloader 4 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 5 | from ..common.optim import SGD as optimizer 6 | from ..common.train import train 7 | 8 | from detectron2.modeling.mmdet_wrapper import MMDetDetector 9 | from detectron2.config import LazyCall as L 10 | 11 | model = L(MMDetDetector)( 12 | detector=dict( 13 | type="MaskRCNN", 14 | pretrained="torchvision://resnet50", 15 | backbone=dict( 16 | type="ResNet", 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type="BN", requires_grad=True), 22 | norm_eval=True, 23 | style="pytorch", 24 | ), 25 | neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), 26 | rpn_head=dict( 27 | type="RPNHead", 28 | in_channels=256, 29 | feat_channels=256, 30 | anchor_generator=dict( 31 | type="AnchorGenerator", 32 | scales=[8], 33 | ratios=[0.5, 1.0, 2.0], 34 | strides=[4, 8, 16, 32, 64], 35 | ), 36 | bbox_coder=dict( 37 | type="DeltaXYWHBBoxCoder", 38 | target_means=[0.0, 0.0, 0.0, 0.0], 39 | target_stds=[1.0, 1.0, 1.0, 1.0], 40 | ), 41 | loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), 42 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 43 | ), 44 | roi_head=dict( 45 | type="StandardRoIHead", 46 | bbox_roi_extractor=dict( 47 | type="SingleRoIExtractor", 48 | roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), 49 | out_channels=256, 50 | featmap_strides=[4, 8, 16, 32], 51 | ), 52 | bbox_head=dict( 53 | type="Shared2FCBBoxHead", 54 | in_channels=256, 55 | fc_out_channels=1024, 56 | roi_feat_size=7, 57 | num_classes=80, 58 | bbox_coder=dict( 59 | type="DeltaXYWHBBoxCoder", 60 | target_means=[0.0, 0.0, 0.0, 0.0], 61 | target_stds=[0.1, 0.1, 0.2, 0.2], 62 | ), 63 | reg_class_agnostic=False, 64 | loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), 65 | loss_bbox=dict(type="L1Loss", loss_weight=1.0), 66 | ), 67 | mask_roi_extractor=dict( 68 | type="SingleRoIExtractor", 69 | roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), 70 | out_channels=256, 71 | featmap_strides=[4, 8, 16, 32], 72 | ), 73 | mask_head=dict( 74 | type="FCNMaskHead", 75 | num_convs=4, 76 | in_channels=256, 77 | conv_out_channels=256, 78 | num_classes=80, 79 | loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), 80 | ), 81 | ), 82 | # model training and testing settings 83 | train_cfg=dict( 84 | rpn=dict( 85 | assigner=dict( 86 | type="MaxIoUAssigner", 87 | pos_iou_thr=0.7, 88 | neg_iou_thr=0.3, 89 | min_pos_iou=0.3, 90 | match_low_quality=True, 91 | ignore_iof_thr=-1, 92 | ), 93 | sampler=dict( 94 | type="RandomSampler", 95 | num=256, 96 | pos_fraction=0.5, 97 | neg_pos_ub=-1, 98 | add_gt_as_proposals=False, 99 | ), 100 | allowed_border=-1, 101 | pos_weight=-1, 102 | debug=False, 103 | ), 104 | rpn_proposal=dict( 105 | nms_pre=2000, 106 | max_per_img=1000, 107 | nms=dict(type="nms", iou_threshold=0.7), 108 | min_bbox_size=0, 109 | ), 110 | rcnn=dict( 111 | assigner=dict( 112 | type="MaxIoUAssigner", 113 | pos_iou_thr=0.5, 114 | neg_iou_thr=0.5, 115 | min_pos_iou=0.5, 116 | match_low_quality=True, 117 | ignore_iof_thr=-1, 118 | ), 119 | sampler=dict( 120 | type="RandomSampler", 121 | num=512, 122 | pos_fraction=0.25, 123 | neg_pos_ub=-1, 124 | add_gt_as_proposals=True, 125 | ), 126 | mask_size=28, 127 | pos_weight=-1, 128 | debug=False, 129 | ), 130 | ), 131 | test_cfg=dict( 132 | rpn=dict( 133 | nms_pre=1000, 134 | max_per_img=1000, 135 | nms=dict(type="nms", iou_threshold=0.7), 136 | min_bbox_size=0, 137 | ), 138 | rcnn=dict( 139 | score_thr=0.05, 140 | nms=dict(type="nms", iou_threshold=0.5), 141 | max_per_img=100, 142 | mask_thr_binary=0.5, 143 | ), 144 | ), 145 | ), 146 | pixel_mean=[123.675, 116.280, 103.530], 147 | pixel_std=[58.395, 57.120, 57.375], 148 | ) 149 | 150 | dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model 151 | train.init_checkpoint = None # pretrained model is loaded inside backbone 152 | -------------------------------------------------------------------------------- /configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml: -------------------------------------------------------------------------------- 1 | # A large PanopticFPN for demo purposes. 2 | # Use GN on backbone to support semantic seg. 3 | # Use Cascade + Deform Conv to improve localization. 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" 5 | MODEL: 6 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" 7 | RESNETS: 8 | DEPTH: 101 9 | NORM: "GN" 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | STRIDE_IN_1X1: False 12 | FPN: 13 | NORM: "GN" 14 | ROI_HEADS: 15 | NAME: CascadeROIHeads 16 | ROI_BOX_HEAD: 17 | CLS_AGNOSTIC_BBOX_REG: True 18 | ROI_MASK_HEAD: 19 | NORM: "GN" 20 | RPN: 21 | POST_NMS_TOPK_TRAIN: 2000 22 | SOLVER: 23 | STEPS: (105000, 125000) 24 | MAX_ITER: 135000 25 | IMS_PER_BATCH: 32 26 | BASE_LR: 0.04 27 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | # Train from random initialization. 4 | WEIGHTS: "" 5 | # It makes sense to divide by STD when training from scratch 6 | # But it seems to make no difference on the results and C2's models didn't do this. 7 | # So we keep things consistent with C2. 8 | # PIXEL_STD: [57.375, 57.12, 58.395] 9 | MASK_ON: True 10 | BACKBONE: 11 | FREEZE_AT: 0 12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 13 | # to learn what you need for training from scratch. 14 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/semantic_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | -------------------------------------------------------------------------------- /configs/Misc/torchvision_imagenet_R_50.py: -------------------------------------------------------------------------------- 1 | """ 2 | An example config file to train a ImageNet classifier with detectron2. 3 | Model and dataloader both come from torchvision. 4 | This shows how to use detectron2 as a general engine for any new models and tasks. 5 | 6 | To run, use the following command: 7 | 8 | python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \ 9 | --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/ 10 | 11 | """ 12 | 13 | 14 | import torch 15 | from torch import nn 16 | from torch.nn import functional as F 17 | from omegaconf import OmegaConf 18 | import torchvision 19 | from torchvision.transforms import transforms as T 20 | from torchvision.models.resnet import ResNet, Bottleneck 21 | from fvcore.common.param_scheduler import MultiStepParamScheduler 22 | 23 | from detectron2.solver import WarmupParamScheduler 24 | from detectron2.solver.build import get_default_optimizer_params 25 | from detectron2.config import LazyCall as L 26 | from detectron2.model_zoo import get_config 27 | from detectron2.data.samplers import TrainingSampler, InferenceSampler 28 | from detectron2.evaluation import DatasetEvaluator 29 | from detectron2.utils import comm 30 | 31 | 32 | """ 33 | Note: Here we put reusable code (models, evaluation, data) together with configs just as a 34 | proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2. 35 | Writing code in configs offers extreme flexibility but is often not a good engineering practice. 36 | In practice, you might want to put code in your project and import them instead. 37 | """ 38 | 39 | 40 | def build_data_loader(dataset, batch_size, num_workers, training=True): 41 | return torch.utils.data.DataLoader( 42 | dataset, 43 | sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)), 44 | batch_size=batch_size, 45 | num_workers=num_workers, 46 | pin_memory=True, 47 | ) 48 | 49 | 50 | class ClassificationNet(nn.Module): 51 | def __init__(self, model: nn.Module): 52 | super().__init__() 53 | self.model = model 54 | 55 | @property 56 | def device(self): 57 | return list(self.model.parameters())[0].device 58 | 59 | def forward(self, inputs): 60 | image, label = inputs 61 | pred = self.model(image.to(self.device)) 62 | if self.training: 63 | label = label.to(self.device) 64 | return F.cross_entropy(pred, label) 65 | else: 66 | return pred 67 | 68 | 69 | class ClassificationAcc(DatasetEvaluator): 70 | def reset(self): 71 | self.corr = self.total = 0 72 | 73 | def process(self, inputs, outputs): 74 | image, label = inputs 75 | self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item() 76 | self.total += len(label) 77 | 78 | def evaluate(self): 79 | all_corr_total = comm.all_gather([self.corr, self.total]) 80 | corr = sum(x[0] for x in all_corr_total) 81 | total = sum(x[1] for x in all_corr_total) 82 | return {"accuracy": corr / total} 83 | 84 | 85 | # --- End of code that could be in a project and be imported 86 | 87 | 88 | dataloader = OmegaConf.create() 89 | dataloader.train = L(build_data_loader)( 90 | dataset=L(torchvision.datasets.ImageNet)( 91 | root="/path/to/imagenet", 92 | split="train", 93 | transform=L(T.Compose)( 94 | transforms=[ 95 | L(T.RandomResizedCrop)(size=224), 96 | L(T.RandomHorizontalFlip)(), 97 | T.ToTensor(), 98 | L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 99 | ] 100 | ), 101 | ), 102 | batch_size=256 // 8, 103 | num_workers=4, 104 | training=True, 105 | ) 106 | 107 | dataloader.test = L(build_data_loader)( 108 | dataset=L(torchvision.datasets.ImageNet)( 109 | root="${...train.dataset.root}", 110 | split="val", 111 | transform=L(T.Compose)( 112 | transforms=[ 113 | L(T.Resize)(size=256), 114 | L(T.CenterCrop)(size=224), 115 | T.ToTensor(), 116 | L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 117 | ] 118 | ), 119 | ), 120 | batch_size=256 // 8, 121 | num_workers=4, 122 | training=False, 123 | ) 124 | 125 | dataloader.evaluator = L(ClassificationAcc)() 126 | 127 | model = L(ClassificationNet)( 128 | model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True) 129 | ) 130 | 131 | 132 | optimizer = L(torch.optim.SGD)( 133 | params=L(get_default_optimizer_params)(), 134 | lr=0.1, 135 | momentum=0.9, 136 | weight_decay=1e-4, 137 | ) 138 | 139 | lr_multiplier = L(WarmupParamScheduler)( 140 | scheduler=L(MultiStepParamScheduler)( 141 | values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100] 142 | ), 143 | warmup_length=1 / 100, 144 | warmup_factor=0.1, 145 | ) 146 | 147 | 148 | train = get_config("common/train.py").train 149 | train.init_checkpoint = None 150 | train.max_iter = 100 * 1281167 // 256 151 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/common/README.md: -------------------------------------------------------------------------------- 1 | This directory provides definitions for a few common models, dataloaders, scheduler, 2 | and optimizers that are often used in training. 3 | The definition of these objects are provided in the form of lazy instantiation: 4 | their arguments can be edited by users before constructing the objects. 5 | 6 | They can be imported, or loaded by `model_zoo.get_config` API in users' own configs. 7 | -------------------------------------------------------------------------------- /configs/common/coco_schedule.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver import WarmupParamScheduler 5 | 6 | 7 | def default_X_scheduler(num_X): 8 | """ 9 | Returns the config for a default multi-step LR scheduler such as "1x", "3x", 10 | commonly referred to in papers, where every 1x has the total length of 1440k 11 | training images (~12 COCO epochs). LR is decayed twice at the end of training 12 | following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4. 13 | 14 | Args: 15 | num_X: a positive real number 16 | 17 | Returns: 18 | DictConfig: configs that define the multiplier for LR during training 19 | """ 20 | # total number of iterations assuming 16 batch size, using 1440000/16=90000 21 | total_steps_16bs = num_X * 90000 22 | 23 | if num_X <= 2: 24 | scheduler = L(MultiStepParamScheduler)( 25 | values=[1.0, 0.1, 0.01], 26 | # note that scheduler is scale-invariant. This is equivalent to 27 | # milestones=[6, 8, 9] 28 | milestones=[60000, 80000, 90000], 29 | ) 30 | else: 31 | scheduler = L(MultiStepParamScheduler)( 32 | values=[1.0, 0.1, 0.01], 33 | milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs], 34 | ) 35 | return L(WarmupParamScheduler)( 36 | scheduler=scheduler, 37 | warmup_length=1000 / total_steps_16bs, 38 | warmup_method="linear", 39 | warmup_factor=0.001, 40 | ) 41 | 42 | 43 | lr_multiplier_1x = default_X_scheduler(1) 44 | lr_multiplier_2x = default_X_scheduler(2) 45 | lr_multiplier_3x = default_X_scheduler(3) 46 | lr_multiplier_6x = default_X_scheduler(6) 47 | lr_multiplier_9x = default_X_scheduler(9) 48 | -------------------------------------------------------------------------------- /configs/common/data/coco.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | 3 | import detectron2.data.transforms as T 4 | from detectron2.config import LazyCall as L 5 | from detectron2.data import ( 6 | DatasetMapper, 7 | build_detection_test_loader, 8 | build_detection_train_loader, 9 | get_detection_dataset_dicts, 10 | ) 11 | from detectron2.evaluation import COCOEvaluator 12 | 13 | dataloader = OmegaConf.create() 14 | 15 | dataloader.train = L(build_detection_train_loader)( 16 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), 17 | mapper=L(DatasetMapper)( 18 | is_train=True, 19 | augmentations=[ 20 | L(T.ResizeShortestEdge)( 21 | short_edge_length=(640, 672, 704, 736, 768, 800), 22 | sample_style="choice", 23 | max_size=1333, 24 | ), 25 | L(T.RandomFlip)(horizontal=True), 26 | ], 27 | image_format="BGR", 28 | use_instance_mask=True, 29 | ), 30 | total_batch_size=16, 31 | num_workers=4, 32 | ) 33 | 34 | dataloader.test = L(build_detection_test_loader)( 35 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), 36 | mapper=L(DatasetMapper)( 37 | is_train=False, 38 | augmentations=[ 39 | L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), 40 | ], 41 | image_format="${...train.mapper.image_format}", 42 | ), 43 | num_workers=4, 44 | ) 45 | 46 | dataloader.evaluator = L(COCOEvaluator)( 47 | dataset_name="${..test.dataset.names}", 48 | ) 49 | -------------------------------------------------------------------------------- /configs/common/data/coco_keypoint.py: -------------------------------------------------------------------------------- 1 | from detectron2.data.detection_utils import create_keypoint_hflip_indices 2 | 3 | from .coco import dataloader 4 | 5 | dataloader.train.dataset.min_keypoints = 1 6 | dataloader.train.dataset.names = "keypoints_coco_2017_train" 7 | dataloader.test.dataset.names = "keypoints_coco_2017_val" 8 | 9 | dataloader.train.mapper.update( 10 | use_instance_mask=False, 11 | use_keypoint=True, 12 | keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), 13 | ) 14 | -------------------------------------------------------------------------------- /configs/common/data/coco_panoptic_separated.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.evaluation import ( 3 | COCOEvaluator, 4 | COCOPanopticEvaluator, 5 | DatasetEvaluators, 6 | SemSegEvaluator, 7 | ) 8 | 9 | from .coco import dataloader 10 | 11 | dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" 12 | dataloader.train.dataset.filter_empty = False 13 | dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" 14 | 15 | 16 | dataloader.evaluator = [ 17 | L(COCOEvaluator)( 18 | dataset_name="${...test.dataset.names}", 19 | ), 20 | L(SemSegEvaluator)( 21 | dataset_name="${...test.dataset.names}", 22 | ), 23 | L(COCOPanopticEvaluator)( 24 | dataset_name="${...test.dataset.names}", 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /configs/common/models/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.box_regression import Box2BoxTransform 4 | from detectron2.modeling.matcher import Matcher 5 | from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads 6 | 7 | from .mask_rcnn_fpn import model 8 | 9 | # arguments that don't exist for Cascade R-CNN 10 | [model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] 11 | 12 | model.roi_heads.update( 13 | _target_=CascadeROIHeads, 14 | box_heads=[ 15 | L(FastRCNNConvFCHead)( 16 | input_shape=ShapeSpec(channels=256, height=7, width=7), 17 | conv_dims=[], 18 | fc_dims=[1024, 1024], 19 | ) 20 | for k in range(3) 21 | ], 22 | box_predictors=[ 23 | L(FastRCNNOutputLayers)( 24 | input_shape=ShapeSpec(channels=1024), 25 | test_score_thresh=0.05, 26 | box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), 27 | cls_agnostic_bbox_reg=True, 28 | num_classes="${...num_classes}", 29 | ) 30 | for (w1, w2) in [(10, 5), (20, 10), (30, 15)] 31 | ], 32 | proposal_matchers=[ 33 | L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) 34 | for th in [0.5, 0.6, 0.7] 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /configs/common/models/fcos.py: -------------------------------------------------------------------------------- 1 | from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead 2 | 3 | from .retinanet import model 4 | 5 | model._target_ = FCOS 6 | 7 | del model.anchor_generator 8 | del model.box2box_transform 9 | del model.anchor_matcher 10 | del model.input_format 11 | 12 | # Use P5 instead of C5 to compute P6/P7 13 | # (Sec 2.2 of https://arxiv.org/abs/2006.09214) 14 | model.backbone.top_block.in_feature = "p5" 15 | model.backbone.top_block.in_channels = 256 16 | 17 | # New score threshold determined based on sqrt(cls_score * centerness) 18 | model.test_score_thresh = 0.2 19 | model.test_nms_thresh = 0.6 20 | 21 | model.head._target_ = FCOSHead 22 | del model.head.num_anchors 23 | model.head.norm = "GN" 24 | -------------------------------------------------------------------------------- /configs/common/models/keypoint_rcnn_fpn.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.poolers import ROIPooler 4 | from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead 5 | 6 | from .mask_rcnn_fpn import model 7 | 8 | [model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]] 9 | 10 | model.roi_heads.update( 11 | num_classes=1, 12 | keypoint_in_features=["p2", "p3", "p4", "p5"], 13 | keypoint_pooler=L(ROIPooler)( 14 | output_size=14, 15 | scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), 16 | sampling_ratio=0, 17 | pooler_type="ROIAlignV2", 18 | ), 19 | keypoint_head=L(KRCNNConvDeconvUpsampleHead)( 20 | input_shape=ShapeSpec(channels=256, width=14, height=14), 21 | num_keypoints=17, 22 | conv_dims=[512] * 8, 23 | loss_normalizer="visible", 24 | ), 25 | ) 26 | 27 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 28 | # 1000 proposals per-image is found to hurt box AP. 29 | # Therefore we increase it to 1500 per-image. 30 | model.proposal_generator.post_nms_topk = (1500, 1000) 31 | 32 | # Keypoint AP degrades (though box AP improves) when using plain L1 loss 33 | model.roi_heads.box_predictor.smooth_l1_beta = 0.5 34 | -------------------------------------------------------------------------------- /configs/common/models/mask_rcnn_c4.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.meta_arch import GeneralizedRCNN 4 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator 5 | from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet 6 | from detectron2.modeling.box_regression import Box2BoxTransform 7 | from detectron2.modeling.matcher import Matcher 8 | from detectron2.modeling.poolers import ROIPooler 9 | from detectron2.modeling.proposal_generator import RPN, StandardRPNHead 10 | from detectron2.modeling.roi_heads import ( 11 | FastRCNNOutputLayers, 12 | MaskRCNNConvUpsampleHead, 13 | Res5ROIHeads, 14 | ) 15 | 16 | model = L(GeneralizedRCNN)( 17 | backbone=L(ResNet)( 18 | stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), 19 | stages=L(ResNet.make_default_stages)( 20 | depth=50, 21 | stride_in_1x1=True, 22 | norm="FrozenBN", 23 | ), 24 | out_features=["res4"], 25 | ), 26 | proposal_generator=L(RPN)( 27 | in_features=["res4"], 28 | head=L(StandardRPNHead)(in_channels=1024, num_anchors=15), 29 | anchor_generator=L(DefaultAnchorGenerator)( 30 | sizes=[[32, 64, 128, 256, 512]], 31 | aspect_ratios=[0.5, 1.0, 2.0], 32 | strides=[16], 33 | offset=0.0, 34 | ), 35 | anchor_matcher=L(Matcher)( 36 | thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True 37 | ), 38 | box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), 39 | batch_size_per_image=256, 40 | positive_fraction=0.5, 41 | pre_nms_topk=(12000, 6000), 42 | post_nms_topk=(2000, 1000), 43 | nms_thresh=0.7, 44 | ), 45 | roi_heads=L(Res5ROIHeads)( 46 | num_classes=80, 47 | batch_size_per_image=512, 48 | positive_fraction=0.25, 49 | proposal_matcher=L(Matcher)( 50 | thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False 51 | ), 52 | in_features=["res4"], 53 | pooler=L(ROIPooler)( 54 | output_size=14, 55 | scales=(1.0 / 16,), 56 | sampling_ratio=0, 57 | pooler_type="ROIAlignV2", 58 | ), 59 | res5=L(ResNet.make_stage)( 60 | block_class=BottleneckBlock, 61 | num_blocks=3, 62 | stride_per_block=[2, 1, 1], 63 | in_channels=1024, 64 | bottleneck_channels=512, 65 | out_channels=2048, 66 | norm="FrozenBN", 67 | stride_in_1x1=True, 68 | ), 69 | box_predictor=L(FastRCNNOutputLayers)( 70 | input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1), 71 | test_score_thresh=0.05, 72 | box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), 73 | num_classes="${..num_classes}", 74 | ), 75 | mask_head=L(MaskRCNNConvUpsampleHead)( 76 | input_shape=L(ShapeSpec)( 77 | channels="${...res5.out_channels}", 78 | width="${...pooler.output_size}", 79 | height="${...pooler.output_size}", 80 | ), 81 | num_classes="${..num_classes}", 82 | conv_dims=[256], 83 | ), 84 | ), 85 | pixel_mean=[103.530, 116.280, 123.675], 86 | pixel_std=[1.0, 1.0, 1.0], 87 | input_format="BGR", 88 | ) 89 | -------------------------------------------------------------------------------- /configs/common/models/mask_rcnn_fpn.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling.meta_arch import GeneralizedRCNN 4 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool 6 | from detectron2.modeling.backbone import BasicStem, FPN, ResNet 7 | from detectron2.modeling.box_regression import Box2BoxTransform 8 | from detectron2.modeling.matcher import Matcher 9 | from detectron2.modeling.poolers import ROIPooler 10 | from detectron2.modeling.proposal_generator import RPN, StandardRPNHead 11 | from detectron2.modeling.roi_heads import ( 12 | StandardROIHeads, 13 | FastRCNNOutputLayers, 14 | MaskRCNNConvUpsampleHead, 15 | FastRCNNConvFCHead, 16 | ) 17 | 18 | model = L(GeneralizedRCNN)( 19 | backbone=L(FPN)( 20 | bottom_up=L(ResNet)( 21 | stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), 22 | stages=L(ResNet.make_default_stages)( 23 | depth=50, 24 | stride_in_1x1=True, 25 | norm="FrozenBN", 26 | ), 27 | out_features=["res2", "res3", "res4", "res5"], 28 | ), 29 | in_features="${.bottom_up.out_features}", 30 | out_channels=256, 31 | top_block=L(LastLevelMaxPool)(), 32 | ), 33 | proposal_generator=L(RPN)( 34 | in_features=["p2", "p3", "p4", "p5", "p6"], 35 | head=L(StandardRPNHead)(in_channels=256, num_anchors=3), 36 | anchor_generator=L(DefaultAnchorGenerator)( 37 | sizes=[[32], [64], [128], [256], [512]], 38 | aspect_ratios=[0.5, 1.0, 2.0], 39 | strides=[4, 8, 16, 32, 64], 40 | offset=0.0, 41 | ), 42 | anchor_matcher=L(Matcher)( 43 | thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True 44 | ), 45 | box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), 46 | batch_size_per_image=256, 47 | positive_fraction=0.5, 48 | pre_nms_topk=(2000, 1000), 49 | post_nms_topk=(1000, 1000), 50 | nms_thresh=0.7, 51 | ), 52 | roi_heads=L(StandardROIHeads)( 53 | num_classes=80, 54 | batch_size_per_image=512, 55 | positive_fraction=0.25, 56 | proposal_matcher=L(Matcher)( 57 | thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False 58 | ), 59 | box_in_features=["p2", "p3", "p4", "p5"], 60 | box_pooler=L(ROIPooler)( 61 | output_size=7, 62 | scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), 63 | sampling_ratio=0, 64 | pooler_type="ROIAlignV2", 65 | ), 66 | box_head=L(FastRCNNConvFCHead)( 67 | input_shape=ShapeSpec(channels=256, height=7, width=7), 68 | conv_dims=[], 69 | fc_dims=[1024, 1024], 70 | ), 71 | box_predictor=L(FastRCNNOutputLayers)( 72 | input_shape=ShapeSpec(channels=1024), 73 | test_score_thresh=0.05, 74 | box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), 75 | num_classes="${..num_classes}", 76 | ), 77 | mask_in_features=["p2", "p3", "p4", "p5"], 78 | mask_pooler=L(ROIPooler)( 79 | output_size=14, 80 | scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), 81 | sampling_ratio=0, 82 | pooler_type="ROIAlignV2", 83 | ), 84 | mask_head=L(MaskRCNNConvUpsampleHead)( 85 | input_shape=ShapeSpec(channels=256, width=14, height=14), 86 | num_classes="${..num_classes}", 87 | conv_dims=[256, 256, 256, 256, 256], 88 | ), 89 | ), 90 | pixel_mean=[103.530, 116.280, 123.675], 91 | pixel_std=[1.0, 1.0, 1.0], 92 | input_format="BGR", 93 | ) 94 | -------------------------------------------------------------------------------- /configs/common/models/panoptic_fpn.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling import PanopticFPN 4 | from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead 5 | 6 | from .mask_rcnn_fpn import model 7 | 8 | model._target_ = PanopticFPN 9 | model.sem_seg_head = L(SemSegFPNHead)( 10 | input_shape={ 11 | f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") 12 | for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32]) 13 | }, 14 | ignore_value=255, 15 | num_classes=54, # COCO stuff + 1 16 | conv_dims=128, 17 | common_stride=4, 18 | loss_weight=0.5, 19 | norm="GN", 20 | ) 21 | -------------------------------------------------------------------------------- /configs/common/models/retinanet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.layers import ShapeSpec 5 | from detectron2.modeling.meta_arch import RetinaNet 6 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator 7 | from detectron2.modeling.backbone.fpn import LastLevelP6P7 8 | from detectron2.modeling.backbone import BasicStem, FPN, ResNet 9 | from detectron2.modeling.box_regression import Box2BoxTransform 10 | from detectron2.modeling.matcher import Matcher 11 | from detectron2.modeling.meta_arch.retinanet import RetinaNetHead 12 | 13 | model = L(RetinaNet)( 14 | backbone=L(FPN)( 15 | bottom_up=L(ResNet)( 16 | stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), 17 | stages=L(ResNet.make_default_stages)( 18 | depth=50, 19 | stride_in_1x1=True, 20 | norm="FrozenBN", 21 | ), 22 | out_features=["res3", "res4", "res5"], 23 | ), 24 | in_features=["res3", "res4", "res5"], 25 | out_channels=256, 26 | top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"), 27 | ), 28 | head=L(RetinaNetHead)( 29 | # Shape for each input feature map 30 | input_shape=[ShapeSpec(channels=256)] * 5, 31 | num_classes="${..num_classes}", 32 | conv_dims=[256, 256, 256, 256], 33 | prior_prob=0.01, 34 | num_anchors=9, 35 | ), 36 | anchor_generator=L(DefaultAnchorGenerator)( 37 | sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]], 38 | aspect_ratios=[0.5, 1.0, 2.0], 39 | strides=[8, 16, 32, 64, 128], 40 | offset=0.0, 41 | ), 42 | box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), 43 | anchor_matcher=L(Matcher)( 44 | thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True 45 | ), 46 | num_classes=80, 47 | head_in_features=["p3", "p4", "p5", "p6", "p7"], 48 | focal_loss_alpha=0.25, 49 | focal_loss_gamma=2.0, 50 | pixel_mean=[103.530, 116.280, 123.675], 51 | pixel_std=[1.0, 1.0, 1.0], 52 | input_format="BGR", 53 | ) 54 | -------------------------------------------------------------------------------- /configs/common/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver.build import get_default_optimizer_params 5 | 6 | SGD = L(torch.optim.SGD)( 7 | params=L(get_default_optimizer_params)( 8 | # params.model is meant to be set to the model object, before instantiating 9 | # the optimizer. 10 | weight_decay_norm=0.0 11 | ), 12 | lr=0.02, 13 | momentum=0.9, 14 | weight_decay=1e-4, 15 | ) 16 | -------------------------------------------------------------------------------- /configs/common/train.py: -------------------------------------------------------------------------------- 1 | # Common training-related configs that are designed for "tools/lazyconfig_train_net.py" 2 | # You can use your own instead, together with your own train_net.py 3 | train = dict( 4 | output_dir="./output", 5 | init_checkpoint="", 6 | max_iter=90000, 7 | amp=dict(enabled=False), # options for Automatic Mixed Precision 8 | ddp=dict( # options for DistributedDataParallel 9 | broadcast_buffers=False, 10 | find_unused_parameters=False, 11 | fp16_compression=False, 12 | ), 13 | checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer 14 | eval_period=5000, 15 | log_period=20, 16 | device="cuda" 17 | # ... 18 | ) 19 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | model.backbone.bottom_up.stages.depth = 101 10 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_101_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 2 # 100ep -> 200ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_101_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 4 # 100ep -> 400ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 4 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2.config.lazy import LazyCall as L 3 | from detectron2.layers.batch_norm import NaiveSyncBatchNorm 4 | from detectron2.solver import WarmupParamScheduler 5 | from fvcore.common.param_scheduler import MultiStepParamScheduler 6 | 7 | from ..common.data.coco import dataloader 8 | from ..common.models.mask_rcnn_fpn import model 9 | from ..common.optim import SGD as optimizer 10 | from ..common.train import train 11 | 12 | # train from scratch 13 | train.init_checkpoint = "" 14 | train.amp.enabled = True 15 | train.ddp.fp16_compression = True 16 | model.backbone.bottom_up.freeze_at = 0 17 | 18 | # SyncBN 19 | # fmt: off 20 | model.backbone.bottom_up.stem.norm = \ 21 | model.backbone.bottom_up.stages.norm = \ 22 | model.backbone.norm = "SyncBN" 23 | 24 | # Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by 25 | # torch.nn.SyncBatchNorm. We can remove this after 26 | # https://github.com/pytorch/pytorch/issues/36530 is fixed. 27 | model.roi_heads.box_head.conv_norm = \ 28 | model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, 29 | stats_mode="N") 30 | # fmt: on 31 | 32 | # 2conv in RPN: 33 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 34 | model.proposal_generator.head.conv_dims = [-1, -1] 35 | 36 | # 4conv1fc box head 37 | model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] 38 | model.roi_heads.box_head.fc_dims = [1024] 39 | 40 | # resize_and_crop_image in: 41 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950 42 | image_size = 1024 43 | dataloader.train.mapper.augmentations = [ 44 | L(T.ResizeScale)( 45 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 46 | ), 47 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), 48 | L(T.RandomFlip)(horizontal=True), 49 | ] 50 | 51 | # recompute boxes due to cropping 52 | dataloader.train.mapper.recompute_boxes = True 53 | 54 | # larger batch-size. 55 | dataloader.train.total_batch_size = 64 56 | 57 | # Equivalent to 100 epochs. 58 | # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep 59 | train.max_iter = 184375 60 | 61 | lr_multiplier = L(WarmupParamScheduler)( 62 | scheduler=L(MultiStepParamScheduler)( 63 | values=[1.0, 0.1, 0.01], 64 | milestones=[163889, 177546], 65 | num_updates=train.max_iter, 66 | ), 67 | warmup_length=500 / train.max_iter, 68 | warmup_factor=0.067, 69 | ) 70 | 71 | optimizer.lr = 0.1 72 | optimizer.weight_decay = 4e-5 73 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 2 # 100ep -> 200ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 4 # 100ep -> 400ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 4 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter //= 2 # 100ep -> 50ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone // 2 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | from detectron2.config import LazyCall as L 9 | from detectron2.modeling.backbone import RegNet 10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 11 | 12 | # Config source: 13 | # https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=23, 19 | w_a=38.65, 20 | w_0=96, 21 | w_m=2.43, 22 | group_width=40, 23 | norm="SyncBN", 24 | out_features=["s1", "s2", "s3", "s4"], 25 | ) 26 | model.pixel_std = [57.375, 57.120, 58.395] 27 | 28 | # RegNets benefit from enabling cudnn benchmark mode 29 | train.cudnn_benchmark = True 30 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 2 # 100ep -> 200ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 4 # 100ep -> 400ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 4 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | from detectron2.config import LazyCall as L 9 | from detectron2.modeling.backbone import RegNet 10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 11 | 12 | # Config source: 13 | # https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=22, 19 | w_a=31.41, 20 | w_0=96, 21 | w_m=2.24, 22 | group_width=64, 23 | se_ratio=0.25, 24 | norm="SyncBN", 25 | out_features=["s1", "s2", "s3", "s4"], 26 | ) 27 | model.pixel_std = [57.375, 57.120, 58.395] 28 | 29 | # RegNets benefit from enabling cudnn benchmark mode 30 | train.cudnn_benchmark = True 31 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 2 # 100ep -> 200ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | 9 | train.max_iter *= 4 # 100ep -> 400ep 10 | 11 | lr_multiplier.scheduler.milestones = [ 12 | milestone * 4 for milestone in lr_multiplier.scheduler.milestones 13 | ] 14 | lr_multiplier.scheduler.num_updates = train.max_iter 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/README.md: -------------------------------------------------------------------------------- 1 | These are quick configs for performance or accuracy regression tracking purposes. 2 | 3 | * `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can 4 | successfully finish. They are not expected to produce reasonable training results. 5 | * `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify 6 | the results are as expected. 7 | * `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy 8 | is within the normal range. 9 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_val_100",) 4 | TEST: ("coco_2017_val_100",) 5 | SOLVER: 6 | BASE_LR: 0.005 7 | STEPS: (30,) 8 | MAX_ITER: 40 9 | IMS_PER_BATCH: 4 10 | DATALOADER: 11 | NUM_WORKERS: 2 12 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 7 | TEST: ("coco_2017_val_100",) 8 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 9 | SOLVER: 10 | BASE_LR: 0.005 11 | STEPS: (30,) 12 | MAX_ITER: 40 13 | IMS_PER_BATCH: 4 14 | DATALOADER: 15 | NUM_WORKERS: 2 16 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" 4 | DATASETS: 5 | TEST: ("keypoints_coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | ROI_HEADS: 6 | NUM_CLASSES: 1 7 | DATASETS: 8 | TRAIN: ("keypoints_coco_2017_val_100",) 9 | TEST: ("keypoints_coco_2017_val_100",) 10 | SOLVER: 11 | BASE_LR: 0.005 12 | STEPS: (30,) 13 | MAX_ITER: 40 14 | IMS_PER_BATCH: 4 15 | DATALOADER: 16 | NUM_WORKERS: 2 17 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False 14 | LOSS_WEIGHT: 4.0 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 17 | RPN: 18 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 19 | DATASETS: 20 | TRAIN: ("keypoints_coco_2017_val",) 21 | TEST: ("keypoints_coco_2017_val",) 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | SOLVER: 25 | WARMUP_FACTOR: 0.33333333 26 | WARMUP_ITERS: 100 27 | STEPS: (5500, 5800) 28 | MAX_ITER: 6000 29 | TEST: 30 | EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] 31 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | ROI_BOX_HEAD: 14 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 15 | RPN: 16 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 17 | DATASETS: 18 | TRAIN: ("keypoints_coco_2017_val",) 19 | TEST: ("keypoints_coco_2017_val",) 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | SOLVER: 23 | WARMUP_FACTOR: 0.33333333 24 | WARMUP_ITERS: 100 25 | STEPS: (5500, 5800) 26 | MAX_ITER: 6000 27 | TEST: 28 | EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | CLIP_GRADIENTS: 14 | ENABLED: True 15 | CLIP_TYPE: "value" 16 | CLIP_VALUE: 1.0 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # base uses 16 17 | WARMUP_FACTOR: 0.33333 18 | WARMUP_ITERS: 100 19 | STEPS: (11000, 11600) 20 | MAX_ITER: 12000 21 | TEST: 22 | EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] 23 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] 8 | AUG: 9 | ENABLED: True 10 | MIN_SIZES: (700, 800) # to save some time 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | TRAIN_ON_PRED_BOXES: True 5 | TEST: 6 | EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] 7 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | WARMUP_FACTOR: 0.3333333 17 | WARMUP_ITERS: 100 18 | STEPS: (5500, 5800) 19 | MAX_ITER: 6000 20 | TEST: 21 | EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] 22 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100_panoptic_separated",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_100_panoptic_separated",) 12 | TEST: ("coco_2017_val_100_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.005 15 | STEPS: (30,) 16 | MAX_ITER: 40 17 | IMS_PER_BATCH: 4 18 | DATALOADER: 19 | NUM_WORKERS: 1 20 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_panoptic_separated",) 12 | TEST: ("coco_2017_val_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.01 15 | WARMUP_FACTOR: 0.001 16 | WARMUP_ITERS: 500 17 | STEPS: (5500,) 18 | MAX_ITER: 7000 19 | TEST: 20 | EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] 21 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | BASE_LR: 0.005 9 | STEPS: (30,) 10 | MAX_ITER: 40 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | STEPS: (30,) 9 | MAX_ITER: 40 10 | BASE_LR: 0.005 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: 10 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | SOLVER: 13 | BASE_LR: 0.005 14 | STEPS: (30,) 15 | MAX_ITER: 40 16 | IMS_PER_BATCH: 4 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WARMUP_FACTOR: 0.001 13 | WARMUP_ITERS: 300 14 | STEPS: (5500,) 15 | MAX_ITER: 7000 16 | TEST: 17 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /export_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | The script was taken from Detectron2 repo: 3 | https://github.com/facebookresearch/detectron2/blob/v0.6/tools/deploy/export_model.py 4 | 5 | We might need to update the script if it's no longer compatible with the later releases of detectron2 6 | """ 7 | 8 | # Copyright (c) Facebook, Inc. and its affiliates. 9 | import argparse 10 | import os 11 | from typing import Dict, List, Tuple 12 | import torch 13 | from torch import Tensor, nn 14 | 15 | import detectron2.data.transforms as T 16 | from detectron2.checkpoint import DetectionCheckpointer 17 | from detectron2.config import get_cfg 18 | from detectron2.data import build_detection_test_loader, detection_utils 19 | from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format 20 | from detectron2.export import ( 21 | Caffe2Tracer, 22 | TracingAdapter, 23 | add_export_config, 24 | dump_torchscript_IR, 25 | scripting_with_instances, 26 | ) 27 | from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model 28 | from detectron2.modeling.postprocessing import detector_postprocess 29 | from detectron2.projects.point_rend import add_pointrend_config 30 | from detectron2.structures import Boxes 31 | from detectron2.utils.env import TORCH_VERSION 32 | from detectron2.utils.file_io import PathManager 33 | from detectron2.utils.logger import setup_logger 34 | import torch 35 | from unittest.mock import patch 36 | from functools import wraps 37 | 38 | 39 | def patch_torch_stack(func): 40 | orig_stack = torch.stack 41 | 42 | def new_stack(*args, **kwargs): 43 | return orig_stack(*args, **kwargs).to('cuda') 44 | 45 | @wraps(func) 46 | def wrapper(*args, **kwargs): 47 | with patch("torch.stack", side_effect=new_stack, wraps=orig_stack): 48 | return func(*args, **kwargs) 49 | return wrapper 50 | 51 | 52 | def setup_cfg(args): 53 | cfg = get_cfg() 54 | # cuda context is initialized before creating dataloader, so we don't fork anymore 55 | cfg.DATALOADER.NUM_WORKERS = 0 56 | cfg = add_export_config(cfg) 57 | add_pointrend_config(cfg) 58 | cfg.merge_from_file(args.config_file) 59 | cfg.merge_from_list(args.opts) 60 | cfg.freeze() 61 | return cfg 62 | 63 | 64 | def export_caffe2_tracing(cfg, torch_model, inputs): 65 | tracer = Caffe2Tracer(cfg, torch_model, inputs) 66 | if args.format == "caffe2": 67 | caffe2_model = tracer.export_caffe2() 68 | caffe2_model.save_protobuf(args.output) 69 | # draw the caffe2 graph 70 | caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs) 71 | return caffe2_model 72 | elif args.format == "onnx": 73 | import onnx 74 | 75 | onnx_model = tracer.export_onnx() 76 | onnx.save(onnx_model, os.path.join(args.output, "model.onnx")) 77 | elif args.format == "torchscript": 78 | ts_model = tracer.export_torchscript() 79 | with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: 80 | torch.jit.save(ts_model, f) 81 | dump_torchscript_IR(ts_model, args.output) 82 | 83 | 84 | # experimental. API not yet final 85 | @patch_torch_stack 86 | def export_scripting(torch_model): 87 | assert TORCH_VERSION >= (1, 8) 88 | fields = { 89 | "proposal_boxes": Boxes, 90 | "objectness_logits": Tensor, 91 | "pred_boxes": Boxes, 92 | "scores": Tensor, 93 | "pred_classes": Tensor, 94 | "pred_masks": Tensor, 95 | "pred_keypoints": torch.Tensor, 96 | "pred_keypoint_heatmaps": torch.Tensor, 97 | } 98 | assert args.format == "torchscript", "Scripting only supports torchscript format." 99 | 100 | class ScriptableAdapterBase(nn.Module): 101 | # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944 102 | # by not retuning instances but dicts. Otherwise the exported model is not deployable 103 | def __init__(self): 104 | super().__init__() 105 | self.model = torch_model 106 | self.eval() 107 | 108 | if isinstance(torch_model, GeneralizedRCNN): 109 | 110 | class ScriptableAdapter(ScriptableAdapterBase): 111 | def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]: 112 | instances = self.model.inference(inputs, do_postprocess=False) 113 | return [i.get_fields() for i in instances] 114 | 115 | else: 116 | 117 | class ScriptableAdapter(ScriptableAdapterBase): 118 | def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]: 119 | instances = self.model(inputs) 120 | return [i.get_fields() for i in instances] 121 | 122 | ts_model = scripting_with_instances(ScriptableAdapter(), fields) 123 | with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: 124 | torch.jit.save(ts_model, f) 125 | dump_torchscript_IR(ts_model, args.output) 126 | # TODO inference in Python now missing postprocessing glue code 127 | return None 128 | 129 | 130 | # experimental. API not yet final 131 | @patch_torch_stack 132 | def export_tracing(torch_model, inputs): 133 | assert TORCH_VERSION >= (1, 8) 134 | image = inputs[0]["image"] 135 | inputs = [{"image": image}] # remove other unused keys 136 | 137 | if isinstance(torch_model, GeneralizedRCNN): 138 | 139 | def inference(model, inputs): 140 | # use do_postprocess=False so it returns ROI mask 141 | inst = model.inference(inputs, do_postprocess=False)[0] 142 | return [{"instances": inst}] 143 | 144 | else: 145 | inference = None # assume that we just call the model directly 146 | 147 | traceable_model = TracingAdapter(torch_model, inputs, inference) 148 | 149 | if args.format == "torchscript": 150 | ts_model = torch.jit.trace(traceable_model, (image,)) 151 | with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: 152 | torch.jit.save(ts_model, f) 153 | dump_torchscript_IR(ts_model, args.output) 154 | elif args.format == "onnx": 155 | with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f: 156 | torch.onnx.export(traceable_model, (image,), f, opset_version=11) 157 | logger.info("Inputs schema: " + str(traceable_model.inputs_schema)) 158 | logger.info("Outputs schema: " + str(traceable_model.outputs_schema)) 159 | 160 | if args.format != "torchscript": 161 | return None 162 | if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)): 163 | return None 164 | 165 | def eval_wrapper(inputs): 166 | """ 167 | The exported model does not contain the final resize step, which is typically 168 | unused in deployment but needed for evaluation. We add it manually here. 169 | """ 170 | input = inputs[0] 171 | instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"] 172 | postprocessed = detector_postprocess(instances, input["height"], input["width"]) 173 | return [{"instances": postprocessed}] 174 | 175 | return eval_wrapper 176 | 177 | 178 | def get_sample_inputs(args): 179 | 180 | if args.sample_image is None: 181 | # get a first batch from dataset 182 | data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) 183 | first_batch = next(iter(data_loader)) 184 | return first_batch 185 | else: 186 | # get a sample data 187 | original_image = detection_utils.read_image(args.sample_image, format=cfg.INPUT.FORMAT) 188 | # Do same preprocessing as DefaultPredictor 189 | aug = T.ResizeShortestEdge( 190 | [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST 191 | ) 192 | height, width = original_image.shape[:2] 193 | image = aug.get_transform(original_image).apply_image(original_image) 194 | image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) 195 | 196 | inputs = {"image": image, "height": height, "width": width} 197 | 198 | # Sample ready 199 | sample_inputs = [inputs] 200 | return sample_inputs 201 | 202 | 203 | if __name__ == "__main__": 204 | parser = argparse.ArgumentParser(description="Export a model for deployment.") 205 | parser.add_argument( 206 | "--format", 207 | choices=["caffe2", "onnx", "torchscript"], 208 | help="output format", 209 | default="caffe2", 210 | ) 211 | parser.add_argument( 212 | "--export-method", 213 | choices=["caffe2_tracing", "tracing", "scripting"], 214 | help="Method to export models", 215 | default="caffe2_tracing", 216 | ) 217 | parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") 218 | parser.add_argument("--sample-image", default=None, type=str, help="sample image for input") 219 | parser.add_argument("--run-eval", action="store_true") 220 | parser.add_argument("--output", help="output directory for the converted model") 221 | parser.add_argument( 222 | "opts", 223 | help="Modify config options using the command-line", 224 | default=None, 225 | nargs=argparse.REMAINDER, 226 | ) 227 | args = parser.parse_args() 228 | logger = setup_logger() 229 | logger.info("Command line arguments: " + str(args)) 230 | PathManager.mkdirs(args.output) 231 | # Disable respecialization on new shapes. Otherwise --run-eval will be slow 232 | torch._C._jit_set_bailout_depth(1) 233 | 234 | cfg = setup_cfg(args) 235 | 236 | # create a torch model 237 | torch_model = build_model(cfg) 238 | DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) 239 | torch_model.eval() 240 | 241 | # get sample data 242 | sample_inputs = get_sample_inputs(args) 243 | 244 | # convert and save model 245 | if args.export_method == "caffe2_tracing": 246 | exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs) 247 | elif args.export_method == "scripting": 248 | exported_model = export_scripting(torch_model) 249 | elif args.export_method == "tracing": 250 | exported_model = export_tracing(torch_model, sample_inputs) 251 | 252 | # run evaluation with the converted model 253 | if args.run_eval: 254 | assert exported_model is not None, ( 255 | "Python inference is not yet implemented for " 256 | f"export_method={args.export_method}, format={args.format}." 257 | ) 258 | logger.info("Running evaluation ... this takes a long time if you export to CPU.") 259 | dataset = cfg.DATASETS.TEST[0] 260 | data_loader = build_detection_test_loader(cfg, dataset) 261 | # NOTE: hard-coded evaluator. change to the evaluator for your dataset 262 | evaluator = COCOEvaluator(dataset, output_dir=args.output) 263 | metrics = inference_on_dataset(exported_model, data_loader, evaluator) 264 | print_csv_format(metrics) 265 | -------------------------------------------------------------------------------- /models/infer_pipeline/config.pbtxt: -------------------------------------------------------------------------------- 1 | platform: "ensemble" 2 | max_batch_size: 0 3 | 4 | input [ 5 | { 6 | name: "IMAGE_BYTES" 7 | data_type: TYPE_STRING 8 | dims: [ 1 ] 9 | } 10 | ] 11 | output [ 12 | { 13 | name: "BBOXES" 14 | data_type: TYPE_FP32 15 | dims: [-1,4] 16 | }, 17 | { 18 | name: "CLASSES" 19 | data_type: TYPE_INT64 20 | dims: [-1] 21 | }, 22 | { 23 | name: "MASKS" 24 | data_type: TYPE_UINT8 25 | dims: [-1, -1, -1] 26 | }, 27 | { 28 | name: "SCORES" 29 | data_type: TYPE_FP32 30 | dims: [-1] 31 | } 32 | ] 33 | 34 | ensemble_scheduling { 35 | step [ 36 | { 37 | model_name: "preprocess" 38 | model_version: 1 39 | input_map: { 40 | key: "image_bytes" 41 | value: "IMAGE_BYTES" 42 | } 43 | output_map: { 44 | key: "preprocessed_image" 45 | value: "PREPROCESSED_IMAGE" 46 | } 47 | }, 48 | { 49 | model_name: "mask_rcnn" 50 | model_version: 1 51 | input_map: { 52 | key: "image__0" 53 | value: "PREPROCESSED_IMAGE" 54 | } 55 | output_map: { 56 | key: "bboxes__0" 57 | value: "RAW_BBOXES" 58 | } 59 | output_map: { 60 | key: "classes__1" 61 | value: "RAW_CLASSES" 62 | } 63 | output_map: { 64 | key: "masks__2" 65 | value: "RAW_MASKS" 66 | } 67 | output_map: { 68 | key: "scores__3" 69 | value: "RAW_SCORES" 70 | } 71 | output_map: { 72 | key: "shape__4" 73 | value: "SHAPE" 74 | } 75 | }, 76 | { 77 | model_name: "postprocess" 78 | model_version: 1 79 | input_map: { 80 | key: "bboxes" 81 | value: "RAW_BBOXES" 82 | } 83 | input_map: { 84 | key: "classes" 85 | value: "RAW_CLASSES" 86 | } 87 | input_map: { 88 | key: "masks" 89 | value: "RAW_MASKS" 90 | } 91 | input_map: { 92 | key: "scores" 93 | value: "RAW_SCORES" 94 | } 95 | input_map: { 96 | key: "shape" 97 | value: "SHAPE" 98 | } 99 | output_map: { 100 | key: "post_bboxes" 101 | value: "BBOXES" 102 | } 103 | output_map: { 104 | key: "post_classes" 105 | value: "CLASSES" 106 | } 107 | output_map: { 108 | key: "post_masks" 109 | value: "MASKS" 110 | } 111 | output_map: { 112 | key: "post_scores" 113 | value: "SCORES" 114 | } 115 | } 116 | ] 117 | } 118 | -------------------------------------------------------------------------------- /models/mask_rcnn/config.pbtxt: -------------------------------------------------------------------------------- 1 | platform: "pytorch_libtorch" 2 | default_model_filename: "model.ts" 3 | max_batch_size: 0 4 | input [ 5 | { 6 | name: "image__0" 7 | data_type: TYPE_UINT8 8 | dims: [3, -1, -1] 9 | } 10 | ] 11 | output [ 12 | { 13 | name: "bboxes__0" 14 | data_type: TYPE_FP32 15 | dims: [-1,4] 16 | }, 17 | { 18 | name: "classes__1" 19 | data_type: TYPE_INT64 20 | dims: [-1] 21 | }, 22 | { 23 | name: "masks__2" 24 | data_type: TYPE_FP32 25 | dims: [-1, 1, -1, -1] 26 | }, 27 | { 28 | name: "scores__3" 29 | data_type: TYPE_FP32 30 | dims: [-1] 31 | }, 32 | { 33 | name: "shape__4" 34 | data_type: TYPE_INT64 35 | dims: [2] 36 | } 37 | ] 38 | -------------------------------------------------------------------------------- /models/postprocess/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "python" 2 | 3 | input [ 4 | { 5 | name: "bboxes" 6 | data_type: TYPE_FP32 7 | dims: [-1,4] 8 | }, 9 | { 10 | name: "classes" 11 | data_type: TYPE_INT64 12 | dims: [-1] 13 | }, 14 | { 15 | name: "masks" 16 | data_type: TYPE_FP32 17 | dims: [-1, 1, -1, -1] 18 | }, 19 | { 20 | name: "scores" 21 | data_type: TYPE_FP32 22 | dims: [-1] 23 | }, 24 | { 25 | name: "shape" 26 | data_type: TYPE_INT64 27 | dims: [2] 28 | } 29 | ] 30 | 31 | output [ 32 | { 33 | name: "post_bboxes" 34 | data_type: TYPE_FP32 35 | dims: [-1,4] 36 | }, 37 | { 38 | name: "post_classes" 39 | data_type: TYPE_INT64 40 | dims: [-1] 41 | }, 42 | { 43 | name: "post_masks" 44 | data_type: TYPE_UINT8 45 | dims: [-1, -1, -1] 46 | }, 47 | { 48 | name: "post_scores" 49 | data_type: TYPE_FP32 50 | dims: [-1] 51 | } 52 | ] 53 | 54 | parameters: { 55 | key: "EXECUTION_ENV_PATH", 56 | value: {string_value: "$$TRITON_MODEL_DIRECTORY/pyenv.tar.gz"} 57 | } 58 | -------------------------------------------------------------------------------- /models/preprocess/config.pbtxt: -------------------------------------------------------------------------------- 1 | backend: "python" 2 | 3 | input [ 4 | { 5 | name: "image_bytes" 6 | data_type: TYPE_STRING 7 | dims: [1] 8 | } 9 | ] 10 | 11 | output [ 12 | { 13 | name: "preprocessed_image" 14 | data_type: TYPE_UINT8 15 | dims: [3, -1, -1] 16 | } 17 | ] 18 | 19 | 20 | parameters: { 21 | key: "EXECUTION_ENV_PATH", 22 | value: {string_value: "$$TRITON_MODEL_DIRECTORY/pyenv.tar.gz"} 23 | } 24 | -------------------------------------------------------------------------------- /python_models/postprocess.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import triton_python_backend_utils as pb_utils 4 | 5 | 6 | class TritonPythonModel: 7 | def get_inputs(self, request): 8 | input_tensor_names = \ 9 | ['bboxes', 'classes', 'masks', 'scores', 'shape'] 10 | inputs = { 11 | tensor_name: pb_utils.get_input_tensor_by_name(request, tensor_name).as_numpy() 12 | for tensor_name in input_tensor_names 13 | } 14 | return inputs 15 | 16 | def paste_mask(self, mask, box, img_h, img_w, threshold): 17 | """ 18 | Paste raw masks with fixed resolution from the mask head to an image 19 | NOTE: You can find the better implementation from: 20 | https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/mask_ops.py 21 | 22 | This method largely based on "paste_mask_in_image_old" from mask_ops.py. 23 | I used it for the sake of simplicity. 24 | 25 | Args: 26 | mask: M x M array where M is the Pooler resolution of your mask head 27 | box: array of shape (4,) 28 | img_h, img_w (int): Image height and width. 29 | threshold (float): Mask binarization threshold in [0, 1]. 30 | Return: 31 | im_mask (Tensor): 32 | The resized and binarized object mask pasted into the original 33 | image plane (a tensor of shape (img_h, img_w)). 34 | """ 35 | box = box.astype(np.int) 36 | # Resize the mask to the size of the bbox 37 | samples_w = box[2] - box[0] + 1 38 | samples_h = box[3] - box[1] + 1 39 | mask = Image.fromarray(mask) 40 | mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR) 41 | mask = np.array(mask, copy=False) 42 | mask = np.array(mask > threshold, dtype=np.uint8) 43 | 44 | im_mask = np.zeros((img_h, img_w), dtype=np.uint8) 45 | x_0 = max(box[0], 0) 46 | x_1 = min(box[2] + 1, img_w) 47 | y_0 = max(box[1], 0) 48 | y_1 = min(box[3] + 1, img_h) 49 | 50 | im_mask[y_0:y_1, x_0:x_1] = mask[ 51 | (y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0]) 52 | ] 53 | return im_mask 54 | 55 | def postprocess(self, predictions): 56 | img_h, img_w = predictions.pop('shape') 57 | # Filter out predictions with low confidence scores 58 | scores = predictions['scores'] 59 | predictions = {name: tensor[scores > 0.5, ...] for name, tensor in predictions.items()} 60 | # Paste masks to the full image 61 | full_masks = [ 62 | self.paste_mask(mask[0, :, :], box, img_h, img_w, 0.5) 63 | for mask, box in zip(predictions['masks'], predictions['bboxes']) 64 | ] 65 | predictions['masks'] = np.stack(full_masks, axis=0) 66 | return predictions 67 | 68 | def execute(self, requests): 69 | responses = [] 70 | for request in requests: 71 | predictions = self.get_inputs(request) 72 | predictions = self.postprocess(predictions) 73 | # prepare outputs for the reponse 74 | out_tensors = [] 75 | for name in ['bboxes', 'classes', 'scores', 'masks']: 76 | tensor = pb_utils.Tensor('post_' + name, predictions[name]) 77 | out_tensors.append(tensor) 78 | response = pb_utils.InferenceResponse(output_tensors=out_tensors) 79 | responses.append(response) 80 | return responses 81 | -------------------------------------------------------------------------------- /python_models/preprocess.py: -------------------------------------------------------------------------------- 1 | import io 2 | import numpy as np 3 | from PIL import Image 4 | import triton_python_backend_utils as pb_utils 5 | 6 | 7 | class TritonPythonModel: 8 | def execute(self, requests): 9 | responses = [] 10 | for request in requests: 11 | image_bytes = \ 12 | pb_utils.get_input_tensor_by_name(request, "image_bytes").as_numpy()[0] 13 | pil_img = Image.open(io.BytesIO(image_bytes)) 14 | img = np.array(pil_img) 15 | img = np.ascontiguousarray(img.transpose(2, 0, 1)) 16 | out_tensor = pb_utils.Tensor("preprocessed_image", img) 17 | response = pb_utils.InferenceResponse(output_tensors=[out_tensor]) 18 | responses.append(response) 19 | return responses 20 | 21 | -------------------------------------------------------------------------------- /python_models/requirements.txt: -------------------------------------------------------------------------------- 1 | conda-pack==0.6.0 2 | numpy==1.23.1 3 | Pillow==9.2.0 4 | -------------------------------------------------------------------------------- /run_triton.sh: -------------------------------------------------------------------------------- 1 | docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 --shm-size=1gb\ 2 | -v /home/tin/Workspace/triton/deploy-detectron2-with-triton/models:/models \ 3 | nvcr.io/nvidia/tritonserver:22.07-py3 tritonserver --model-repository=/models 4 | --------------------------------------------------------------------------------