├── README.md
├── client.py
├── client_pipeline.py
├── configs
    ├── Base-RCNN-C4.yaml
    ├── Base-RCNN-DilatedC5.yaml
    ├── Base-RCNN-FPN.yaml
    ├── Base-RetinaNet.yaml
    ├── COCO-Detection
    │   ├── fast_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_101_C4_3x.yaml
    │   ├── faster_rcnn_R_101_DC5_3x.yaml
    │   ├── faster_rcnn_R_101_FPN_3x.yaml
    │   ├── faster_rcnn_R_50_C4_1x.yaml
    │   ├── faster_rcnn_R_50_C4_3x.yaml
    │   ├── faster_rcnn_R_50_DC5_1x.yaml
    │   ├── faster_rcnn_R_50_DC5_3x.yaml
    │   ├── faster_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_50_FPN_3x.yaml
    │   ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
    │   ├── fcos_R_50_FPN_1x.py
    │   ├── retinanet_R_101_FPN_3x.yaml
    │   ├── retinanet_R_50_FPN_1x.py
    │   ├── retinanet_R_50_FPN_1x.yaml
    │   ├── retinanet_R_50_FPN_3x.yaml
    │   ├── rpn_R_50_C4_1x.yaml
    │   └── rpn_R_50_FPN_1x.yaml
    ├── COCO-InstanceSegmentation
    │   ├── mask_rcnn_R_101_C4_3x.yaml
    │   ├── mask_rcnn_R_101_DC5_3x.yaml
    │   ├── mask_rcnn_R_101_FPN_3x.yaml
    │   ├── mask_rcnn_R_50_C4_1x.py
    │   ├── mask_rcnn_R_50_C4_1x.yaml
    │   ├── mask_rcnn_R_50_C4_3x.yaml
    │   ├── mask_rcnn_R_50_DC5_1x.yaml
    │   ├── mask_rcnn_R_50_DC5_3x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.py
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_giou.yaml
    │   ├── mask_rcnn_R_50_FPN_3x.yaml
    │   ├── mask_rcnn_X_101_32x8d_FPN_3x.yaml
    │   ├── mask_rcnn_regnetx_4gf_dds_fpn_1x.py
    │   └── mask_rcnn_regnety_4gf_dds_fpn_1x.py
    ├── COCO-Keypoints
    │   ├── Base-Keypoint-RCNN-FPN.yaml
    │   ├── keypoint_rcnn_R_101_FPN_3x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.py
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_3x.yaml
    │   └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
    ├── COCO-PanopticSegmentation
    │   ├── Base-Panoptic-FPN.yaml
    │   ├── panoptic_fpn_R_101_3x.yaml
    │   ├── panoptic_fpn_R_50_1x.py
    │   ├── panoptic_fpn_R_50_1x.yaml
    │   └── panoptic_fpn_R_50_3x.yaml
    ├── Cityscapes
    │   └── mask_rcnn_R_50_FPN.yaml
    ├── Detectron1-Comparisons
    │   ├── README.md
    │   ├── faster_rcnn_R_50_FPN_noaug_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_R_50_FPN_noaug_1x.yaml
    ├── LVISv0.5-InstanceSegmentation
    │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── LVISv1-InstanceSegmentation
    │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── Misc
    │   ├── cascade_mask_rcnn_R_50_FPN_1x.yaml
    │   ├── cascade_mask_rcnn_R_50_FPN_3x.yaml
    │   ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml
    │   ├── mmdet_mask_rcnn_R_50_FPN_1x.py
    │   ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
    │   ├── semantic_R_50_FPN_1x.yaml
    │   └── torchvision_imagenet_R_50.py
    ├── PascalVOC-Detection
    │   ├── faster_rcnn_R_50_C4.yaml
    │   └── faster_rcnn_R_50_FPN.yaml
    ├── common
    │   ├── README.md
    │   ├── coco_schedule.py
    │   ├── data
    │   │   ├── coco.py
    │   │   ├── coco_keypoint.py
    │   │   └── coco_panoptic_separated.py
    │   ├── models
    │   │   ├── cascade_rcnn.py
    │   │   ├── fcos.py
    │   │   ├── keypoint_rcnn_fpn.py
    │   │   ├── mask_rcnn_c4.py
    │   │   ├── mask_rcnn_fpn.py
    │   │   ├── panoptic_fpn.py
    │   │   └── retinanet.py
    │   ├── optim.py
    │   └── train.py
    ├── new_baselines
    │   ├── mask_rcnn_R_101_FPN_100ep_LSJ.py
    │   ├── mask_rcnn_R_101_FPN_200ep_LSJ.py
    │   ├── mask_rcnn_R_101_FPN_400ep_LSJ.py
    │   ├── mask_rcnn_R_50_FPN_100ep_LSJ.py
    │   ├── mask_rcnn_R_50_FPN_200ep_LSJ.py
    │   ├── mask_rcnn_R_50_FPN_400ep_LSJ.py
    │   ├── mask_rcnn_R_50_FPN_50ep_LSJ.py
    │   ├── mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
    │   ├── mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
    │   ├── mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
    │   ├── mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
    │   ├── mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
    │   └── mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
    └── quick_schedules
    │   ├── README.md
    │   ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml
    │   ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── fast_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml
    │   ├── mask_rcnn_R_50_C4_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_instant_test.yaml
    │   ├── mask_rcnn_R_50_C4_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_instant_test.yaml
    │   ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── panoptic_fpn_R_50_inference_acc_test.yaml
    │   ├── panoptic_fpn_R_50_instant_test.yaml
    │   ├── panoptic_fpn_R_50_training_acc_test.yaml
    │   ├── retinanet_R_50_FPN_inference_acc_test.yaml
    │   ├── retinanet_R_50_FPN_instant_test.yaml
    │   ├── rpn_R_50_FPN_inference_acc_test.yaml
    │   ├── rpn_R_50_FPN_instant_test.yaml
    │   ├── semantic_R_50_FPN_inference_acc_test.yaml
    │   ├── semantic_R_50_FPN_instant_test.yaml
    │   └── semantic_R_50_FPN_training_acc_test.yaml
├── export_model.py
├── models
    ├── infer_pipeline
    │   └── config.pbtxt
    ├── mask_rcnn
    │   └── config.pbtxt
    ├── postprocess
    │   └── config.pbtxt
    └── preprocess
    │   └── config.pbtxt
├── python_models
    ├── postprocess.py
    ├── preprocess.py
    └── requirements.txt
└── run_triton.sh


/README.md:
--------------------------------------------------------------------------------
1 | # deploy-detectron2-with-triton
2 | This is accompanying code for the [post](https://tintn.github.io/deploy-detectron2-with-triton/) "Deploy Detectron2 models with Triton"
3 | 


--------------------------------------------------------------------------------
/client.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from concurrent.futures import ThreadPoolExecutor, wait
 3 | import time
 4 | import tritonclient.http as httpclient
 5 | from tqdm import tqdm
 6 | from PIL import Image
 7 | import numpy as np
 8 | 
 9 | 
10 | def test_infer(req_id, image_file, model_name, print_output=False):
11 |     img = np.array(Image.open(image_file))
12 |     img = np.ascontiguousarray(img.transpose(2, 0, 1))
13 |     # Define model's inputs
14 |     inputs = []
15 |     inputs.append(httpclient.InferInput('image__0', img.shape, "UINT8"))
16 |     inputs[0].set_data_from_numpy(img)
17 |     # Define model's outputs
18 |     outputs = []
19 |     outputs.append(httpclient.InferRequestedOutput('bboxes__0'))
20 |     outputs.append(httpclient.InferRequestedOutput('classes__1'))
21 |     outputs.append(httpclient.InferRequestedOutput('masks__2'))
22 |     outputs.append(httpclient.InferRequestedOutput('scores__3'))
23 |     outputs.append(httpclient.InferRequestedOutput('shape__4'))
24 |     # Send request to Triton server
25 |     triton_client = httpclient.InferenceServerClient(
26 |         url="localhost:8000", verbose=False)
27 |     results = triton_client.infer(model_name, inputs=inputs, outputs=outputs)
28 |     response_info = results.get_response()
29 |     outputs = {}
30 |     for output_info in response_info['outputs']:
31 |         output_name = output_info['name']
32 |         outputs[output_name] = results.as_numpy(output_name)
33 | 
34 |     if print_output:
35 |         print(req_id, outputs)
36 | 
37 | 
38 | def parse_args():
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--image', required=True)
41 |     parser.add_argument('--model', required=True)
42 |     parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent'])
43 |     parser.add_argument('--num-reqs', default='1')
44 |     parser.add_argument('--print-output', action='store_true')
45 |     return parser.parse_args()
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     args = parse_args()
50 |     image_file = args.image
51 |     model_name = args.model
52 |     mode = args.mode
53 |     n_reqs = int(args.num_reqs)
54 | 
55 |     if mode == 'sequential':
56 |         for i in tqdm(range(n_reqs)):
57 |             test_infer(i, image_file, model_name, args.print_output)
58 |     elif mode == 'concurrent':
59 |         s = time.time()
60 |         with ThreadPoolExecutor(max_workers=10) as executor:
61 |             futures = [
62 |                 executor.submit(test_infer,
63 |                                 i,
64 |                                 image_file,
65 |                                 model_name,
66 |                                 args.print_output)
67 |                 for i in range(n_reqs)
68 |             ]
69 |             wait(futures)
70 |             for f in futures:
71 |                 f.results()
72 |         e = time.time()
73 |         print('FPS:', n_reqs/(e - s))
74 | 


--------------------------------------------------------------------------------
/client_pipeline.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from concurrent.futures import ThreadPoolExecutor, wait
 3 | import time
 4 | import tritonclient.http as httpclient
 5 | from tqdm import tqdm
 6 | from PIL import Image
 7 | import numpy as np
 8 | 
 9 | 
10 | def test_infer(req_id, image_file, model_name, print_output=False):
11 |     with open(image_file, 'rb') as fi:
12 |         image_bytes = fi.read()
13 |     image_bytes = np.array([image_bytes], dtype=np.bytes_)
14 |     # Define model's inputs
15 |     inputs = []
16 |     inputs.append(httpclient.InferInput('IMAGE_BYTES', image_bytes.shape, "BYTES"))
17 |     inputs[0].set_data_from_numpy(image_bytes)
18 |     # Define model's outputs
19 |     outputs = []
20 |     outputs.append(httpclient.InferRequestedOutput('BBOXES'))
21 |     outputs.append(httpclient.InferRequestedOutput('CLASSES'))
22 |     outputs.append(httpclient.InferRequestedOutput('MASKS'))
23 |     outputs.append(httpclient.InferRequestedOutput('SCORES'))
24 |     # Send request to Triton server
25 |     triton_client = httpclient.InferenceServerClient(
26 |         url="localhost:8000", verbose=False)
27 |     results = triton_client.infer(model_name, inputs=inputs, outputs=outputs)
28 |     response_info = results.get_response()
29 |     outputs = {}
30 |     for output_info in response_info['outputs']:
31 |         output_name = output_info['name']
32 |         outputs[output_name] = results.as_numpy(output_name)
33 | 
34 |     if print_output:
35 |         print(req_id, outputs)
36 | 
37 | 
38 | def parse_args():
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--image', required=True)
41 |     parser.add_argument('--model', required=True)
42 |     parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent'])
43 |     parser.add_argument('--num-reqs', default='1')
44 |     parser.add_argument('--print-output', action='store_true')
45 |     return parser.parse_args()
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     args = parse_args()
50 |     image_file = args.image
51 |     model_name = args.model
52 |     mode = args.mode
53 |     n_reqs = int(args.num_reqs)
54 | 
55 |     if mode == 'sequential':
56 |         for i in tqdm(range(n_reqs)):
57 |             test_infer(i, image_file, model_name, args.print_output)
58 |     elif mode == 'concurrent':
59 |         s = time.time()
60 |         with ThreadPoolExecutor(max_workers=10) as executor:
61 |             futures = [
62 |                 executor.submit(test_infer,
63 |                                 i,
64 |                                 image_file,
65 |                                 model_name,
66 |                                 args.print_output)
67 |                 for i in range(n_reqs)
68 |             ]
69 |             wait(futures)
70 |             for f in futures:
71 |                 f.results()
72 |         e = time.time()
73 |         print(n_reqs/(e - s))
74 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   IMS_PER_BATCH: 16
13 |   BASE_LR: 0.02
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | VERSION: 2
19 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 |     SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 |   TRAIN: ("coco_2017_train",)
17 |   TEST: ("coco_2017_val",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 16
20 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
21 |   STEPS: (60000, 80000)
22 |   MAX_ITER: 90000
23 | INPUT:
24 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
25 | VERSION: 2
26 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   LOAD_PROPOSALS: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "PrecomputedProposals"
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 |   TEST: ("coco_2017_val",)
14 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 |   # proposals are part of the dataset_dicts, and take a lot of RAM
17 |   NUM_WORKERS: 2
18 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/fcos_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.fcos import model
 5 | from ..common.train import train
 6 | 
 7 | dataloader.train.mapper.use_instance_mask = False
 8 | optimizer.lr = 0.01
 9 | 
10 | model.backbone.bottom_up.freeze_at = 2
11 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
12 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.retinanet import model
 5 | from ..common.train import train
 6 | 
 7 | dataloader.train.mapper.use_instance_mask = False
 8 | model.backbone.bottom_up.freeze_at = 2
 9 | optimizer.lr = 0.01
10 | 
11 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
12 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     PRE_NMS_TOPK_TEST: 12000
10 |     POST_NMS_TOPK_TEST: 2000
11 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     POST_NMS_TOPK_TEST: 2000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.train import train
2 | from ..common.optim import SGD as optimizer
3 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
4 | from ..common.data.coco import dataloader
5 | from ..common.models.mask_rcnn_c4 import model
6 | 
7 | model.backbone.freeze_at = 2
8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco import dataloader
4 | from ..common.models.mask_rcnn_fpn import model
5 | from ..common.train import train
6 | 
7 | model.backbone.bottom_up.freeze_at = 2
8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   RPN:
 8 |     BBOX_REG_LOSS_TYPE: "giou"
 9 |     BBOX_REG_LOSS_WEIGHT: 2.0
10 |   ROI_BOX_HEAD:
11 |     BBOX_REG_LOSS_TYPE: "giou"
12 |     BBOX_REG_LOSS_WEIGHT: 10.0
13 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.mask_rcnn_fpn import model
 5 | from ..common.train import train
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.modeling.backbone import RegNet
 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 | 
11 | 
12 | # Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=23,
19 |     w_a=38.65,
20 |     w_0=96,
21 |     w_m=2.43,
22 |     group_width=40,
23 |     freeze_at=2,
24 |     norm="FrozenBN",
25 |     out_features=["s1", "s2", "s3", "s4"],
26 | )
27 | model.pixel_std = [57.375, 57.120, 58.395]
28 | 
29 | optimizer.weight_decay = 5e-5
30 | train.init_checkpoint = (
31 |     "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
32 | )
33 | # RegNets benefit from enabling cudnn benchmark mode
34 | train.cudnn_benchmark = True
35 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.mask_rcnn_fpn import model
 5 | from ..common.train import train
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.modeling.backbone import RegNet
 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 | 
11 | 
12 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=22,
19 |     w_a=31.41,
20 |     w_0=96,
21 |     w_m=2.24,
22 |     group_width=64,
23 |     se_ratio=0.25,
24 |     freeze_at=2,
25 |     norm="FrozenBN",
26 |     out_features=["s1", "s2", "s3", "s4"],
27 | )
28 | model.pixel_std = [57.375, 57.120, 58.395]
29 | 
30 | optimizer.weight_decay = 5e-5
31 | train.init_checkpoint = (
32 |     "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
33 | )
34 | # RegNets benefit from enabling cudnn benchmark mode
35 | train.cudnn_benchmark = True
36 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   KEYPOINT_ON: True
 4 |   ROI_HEADS:
 5 |     NUM_CLASSES: 1
 6 |   ROI_BOX_HEAD:
 7 |     SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
 8 |   RPN:
 9 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
10 |     # 1000 proposals per-image is found to hurt box AP.
11 |     # Therefore we increase it to 1500 per-image.
12 |     POST_NMS_TOPK_TRAIN: 1500
13 | DATASETS:
14 |   TRAIN: ("keypoints_coco_2017_train",)
15 |   TEST: ("keypoints_coco_2017_val",)
16 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco_keypoint import dataloader
4 | from ..common.models.keypoint_rcnn_fpn import model
5 | from ..common.train import train
6 | 
7 | model.backbone.bottom_up.freeze_at = 2
8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (210000, 250000)
12 |   MAX_ITER: 270000
13 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   MASK_ON: True
 5 |   SEM_SEG_HEAD:
 6 |     LOSS_WEIGHT: 0.5
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_separated",)
 9 |   TEST: ("coco_2017_val_panoptic_separated",)
10 | DATALOADER:
11 |   FILTER_EMPTY_ANNOTATIONS: False
12 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco_panoptic_separated import dataloader
4 | from ..common.models.panoptic_fpn import model
5 | from ..common.train import train
6 | 
7 | model.backbone.bottom_up.freeze_at = 2
8 | train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
9 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   # For better, more stable performance initialize from COCO
 5 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 6 |   MASK_ON: True
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 8
 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A
10 | # But there are some differences, e.g., we did not initialize the output
11 | # layer using the corresponding classes from COCO
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
14 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
15 |   MIN_SIZE_TEST: 1024
16 |   MAX_SIZE_TRAIN: 2048
17 |   MAX_SIZE_TEST: 2048
18 | DATASETS:
19 |   TRAIN: ("cityscapes_fine_instance_seg_train",)
20 |   TEST: ("cityscapes_fine_instance_seg_val",)
21 | SOLVER:
22 |   BASE_LR: 0.01
23 |   STEPS: (18000,)
24 |   MAX_ITER: 24000
25 |   IMS_PER_BATCH: 8
26 | TEST:
27 |   EVAL_PERIOD: 8000
28 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
 3 | 
 4 | The differences in implementation details are shared in
 5 | [Compatibility with Other Libraries](../../docs/notes/compatibility.md).
 6 | 
 7 | The differences in model zoo's experimental settings include:
 8 | * Use scale augmentation during training. This improves AP with lower training cost.
 9 | * Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
10 |   affect other AP.
11 | * Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
12 | * Use `ROIAlignV2`. This does not significantly affect AP.
13 | 
14 | In this directory, we provide a few configs that __do not__ have the above changes.
15 | They mimic Detectron's behavior as close as possible,
16 | and provide a fair comparison of accuracy and speed against Detectron.
17 | 
18 | <!--
19 | ./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
20 | -->
21 | 
22 | 
23 | <table><tbody>
24 | <!-- START TABLE -->
25 | <!-- TABLE HEADER -->
26 | <th valign="bottom">Name</th>
27 | <th valign="bottom">lr<br/>sched</th>
28 | <th valign="bottom">train<br/>time<br/>(s/iter)</th>
29 | <th valign="bottom">inference<br/>time<br/>(s/im)</th>
30 | <th valign="bottom">train<br/>mem<br/>(GB)</th>
31 | <th valign="bottom">box<br/>AP</th>
32 | <th valign="bottom">mask<br/>AP</th>
33 | <th valign="bottom">kp.<br/>AP</th>
34 | <th valign="bottom">model id</th>
35 | <th valign="bottom">download</th>
36 | <!-- TABLE BODY -->
37 | <!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
38 |  <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
39 | <td align="center">1x</td>
40 | <td align="center">0.219</td>
41 | <td align="center">0.038</td>
42 | <td align="center">3.1</td>
43 | <td align="center">36.9</td>
44 | <td align="center"></td>
45 | <td align="center"></td>
46 | <td align="center">137781054</td>
47 | <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
48 | </tr>
49 | <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
50 |  <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
51 | <td align="center">1x</td>
52 | <td align="center">0.313</td>
53 | <td align="center">0.071</td>
54 | <td align="center">5.0</td>
55 | <td align="center">53.1</td>
56 | <td align="center"></td>
57 | <td align="center">64.2</td>
58 | <td align="center">137781195</td>
59 | <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
60 | </tr>
61 | <!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
62 |  <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
63 | <td align="center">1x</td>
64 | <td align="center">0.273</td>
65 | <td align="center">0.043</td>
66 | <td align="center">3.4</td>
67 | <td align="center">37.8</td>
68 | <td align="center">34.9</td>
69 | <td align="center"></td>
70 | <td align="center">137781281</td>
71 | <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
72 | </tr>
73 | </tbody></table>
74 | 
75 | ## Comparisons:
76 | 
77 | * Faster R-CNN: Detectron's AP is 36.7, similar to ours.
78 | * Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
79 |   [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
80 | 	compensated back by some parameter tuning.
81 | * Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
82 |   See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
83 | 
84 | For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
85 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 | INPUT:
16 |   # no scale augmentation
17 |   MIN_SIZE_TRAIN: (800, )
18 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1
 9 |   ROI_KEYPOINT_HEAD:
10 |     POOLER_RESOLUTION: 14
11 |     POOLER_SAMPLING_RATIO: 2
12 |     POOLER_TYPE: "ROIAlign"
13 |   # Detectron1 uses smooth L1 loss with some magic beta values.
14 |   # The defaults are changed to L1 loss in Detectron2.
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0
17 |     POOLER_SAMPLING_RATIO: 2
18 |     POOLER_TYPE: "ROIAlign"
19 |   RPN:
20 |     SMOOTH_L1_BETA: 0.1111
21 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
22 |     # 1000 proposals per-image is found to hurt box AP.
23 |     # Therefore we increase it to 1500 per-image.
24 |     POST_NMS_TOPK_TRAIN: 1500
25 | DATASETS:
26 |   TRAIN: ("keypoints_coco_2017_train",)
27 |   TEST: ("keypoints_coco_2017_val",)
28 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 |   ROI_MASK_HEAD:
16 |     POOLER_SAMPLING_RATIO: 2
17 |     POOLER_TYPE: "ROIAlign"
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1230
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v0.5_train",)
18 |   TEST: ("lvis_v0.5_val",)
19 | TEST:
20 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
23 |   REPEAT_THRESHOLD: 0.001
24 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1203
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v1_train",)
18 |   TEST: ("lvis_v1_val",)
19 | SOLVER:
20 |   STEPS: (120000, 160000)
21 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
22 | TEST:
23 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
24 | DATALOADER:
25 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
26 |   REPEAT_THRESHOLD: 0.001
27 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | SOLVER:
14 |   STEPS: (210000, 250000)
15 |   MAX_ITER: 270000
16 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 152
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |   ROI_HEADS:
12 |     NAME: "CascadeROIHeads"
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_CONV: 4
16 |     NUM_FC: 1
17 |     NORM: "GN"
18 |     CLS_AGNOSTIC_BBOX_REG: True
19 |   ROI_MASK_HEAD:
20 |     NUM_CONV: 8
21 |     NORM: "GN"
22 |   RPN:
23 |     POST_NMS_TOPK_TRAIN: 2000
24 | SOLVER:
25 |   IMS_PER_BATCH: 128
26 |   STEPS: (35000, 45000)
27 |   MAX_ITER: 50000
28 |   BASE_LR: 0.16
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 864)
31 |   MIN_SIZE_TRAIN_SAMPLING: "range"
32 |   MAX_SIZE_TRAIN: 1440
33 |   CROP:
34 |     ENABLED: True
35 | TEST:
36 |   EVAL_PERIOD: 2500
37 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_BOX_HEAD:
 8 |     CLS_AGNOSTIC_BBOX_REG: True
 9 |   ROI_MASK_HEAD:
10 |     CLS_AGNOSTIC_MASK: True
11 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8 |     DEFORM_MODULATED: False
9 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
 8 |     DEFORM_MODULATED: False
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "GN"
 8 |     STRIDE_IN_1X1: False
 9 |   FPN:
10 |     NORM: "GN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "GN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "GN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "SyncBN"
 8 |     STRIDE_IN_1X1: True
 9 |   FPN:
10 |     NORM: "SyncBN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "SyncBN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "SyncBN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | TEST:
23 |   PRECISE_BN:
24 |     ENABLED: True
25 | 


--------------------------------------------------------------------------------
/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
  1 | # An example config to train a mmdetection model using detectron2.
  2 | 
  3 | from ..common.data.coco import dataloader
  4 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
  5 | from ..common.optim import SGD as optimizer
  6 | from ..common.train import train
  7 | 
  8 | from detectron2.modeling.mmdet_wrapper import MMDetDetector
  9 | from detectron2.config import LazyCall as L
 10 | 
 11 | model = L(MMDetDetector)(
 12 |     detector=dict(
 13 |         type="MaskRCNN",
 14 |         pretrained="torchvision://resnet50",
 15 |         backbone=dict(
 16 |             type="ResNet",
 17 |             depth=50,
 18 |             num_stages=4,
 19 |             out_indices=(0, 1, 2, 3),
 20 |             frozen_stages=1,
 21 |             norm_cfg=dict(type="BN", requires_grad=True),
 22 |             norm_eval=True,
 23 |             style="pytorch",
 24 |         ),
 25 |         neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
 26 |         rpn_head=dict(
 27 |             type="RPNHead",
 28 |             in_channels=256,
 29 |             feat_channels=256,
 30 |             anchor_generator=dict(
 31 |                 type="AnchorGenerator",
 32 |                 scales=[8],
 33 |                 ratios=[0.5, 1.0, 2.0],
 34 |                 strides=[4, 8, 16, 32, 64],
 35 |             ),
 36 |             bbox_coder=dict(
 37 |                 type="DeltaXYWHBBoxCoder",
 38 |                 target_means=[0.0, 0.0, 0.0, 0.0],
 39 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
 40 |             ),
 41 |             loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
 42 |             loss_bbox=dict(type="L1Loss", loss_weight=1.0),
 43 |         ),
 44 |         roi_head=dict(
 45 |             type="StandardRoIHead",
 46 |             bbox_roi_extractor=dict(
 47 |                 type="SingleRoIExtractor",
 48 |                 roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
 49 |                 out_channels=256,
 50 |                 featmap_strides=[4, 8, 16, 32],
 51 |             ),
 52 |             bbox_head=dict(
 53 |                 type="Shared2FCBBoxHead",
 54 |                 in_channels=256,
 55 |                 fc_out_channels=1024,
 56 |                 roi_feat_size=7,
 57 |                 num_classes=80,
 58 |                 bbox_coder=dict(
 59 |                     type="DeltaXYWHBBoxCoder",
 60 |                     target_means=[0.0, 0.0, 0.0, 0.0],
 61 |                     target_stds=[0.1, 0.1, 0.2, 0.2],
 62 |                 ),
 63 |                 reg_class_agnostic=False,
 64 |                 loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
 65 |                 loss_bbox=dict(type="L1Loss", loss_weight=1.0),
 66 |             ),
 67 |             mask_roi_extractor=dict(
 68 |                 type="SingleRoIExtractor",
 69 |                 roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
 70 |                 out_channels=256,
 71 |                 featmap_strides=[4, 8, 16, 32],
 72 |             ),
 73 |             mask_head=dict(
 74 |                 type="FCNMaskHead",
 75 |                 num_convs=4,
 76 |                 in_channels=256,
 77 |                 conv_out_channels=256,
 78 |                 num_classes=80,
 79 |                 loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
 80 |             ),
 81 |         ),
 82 |         # model training and testing settings
 83 |         train_cfg=dict(
 84 |             rpn=dict(
 85 |                 assigner=dict(
 86 |                     type="MaxIoUAssigner",
 87 |                     pos_iou_thr=0.7,
 88 |                     neg_iou_thr=0.3,
 89 |                     min_pos_iou=0.3,
 90 |                     match_low_quality=True,
 91 |                     ignore_iof_thr=-1,
 92 |                 ),
 93 |                 sampler=dict(
 94 |                     type="RandomSampler",
 95 |                     num=256,
 96 |                     pos_fraction=0.5,
 97 |                     neg_pos_ub=-1,
 98 |                     add_gt_as_proposals=False,
 99 |                 ),
100 |                 allowed_border=-1,
101 |                 pos_weight=-1,
102 |                 debug=False,
103 |             ),
104 |             rpn_proposal=dict(
105 |                 nms_pre=2000,
106 |                 max_per_img=1000,
107 |                 nms=dict(type="nms", iou_threshold=0.7),
108 |                 min_bbox_size=0,
109 |             ),
110 |             rcnn=dict(
111 |                 assigner=dict(
112 |                     type="MaxIoUAssigner",
113 |                     pos_iou_thr=0.5,
114 |                     neg_iou_thr=0.5,
115 |                     min_pos_iou=0.5,
116 |                     match_low_quality=True,
117 |                     ignore_iof_thr=-1,
118 |                 ),
119 |                 sampler=dict(
120 |                     type="RandomSampler",
121 |                     num=512,
122 |                     pos_fraction=0.25,
123 |                     neg_pos_ub=-1,
124 |                     add_gt_as_proposals=True,
125 |                 ),
126 |                 mask_size=28,
127 |                 pos_weight=-1,
128 |                 debug=False,
129 |             ),
130 |         ),
131 |         test_cfg=dict(
132 |             rpn=dict(
133 |                 nms_pre=1000,
134 |                 max_per_img=1000,
135 |                 nms=dict(type="nms", iou_threshold=0.7),
136 |                 min_bbox_size=0,
137 |             ),
138 |             rcnn=dict(
139 |                 score_thr=0.05,
140 |                 nms=dict(type="nms", iou_threshold=0.5),
141 |                 max_per_img=100,
142 |                 mask_thr_binary=0.5,
143 |             ),
144 |         ),
145 |     ),
146 |     pixel_mean=[123.675, 116.280, 103.530],
147 |     pixel_std=[58.395, 57.120, 57.375],
148 | )
149 | 
150 | dataloader.train.mapper.image_format = "RGB"  # torchvision pretrained model
151 | train.init_checkpoint = None  # pretrained model is loaded inside backbone
152 | 


--------------------------------------------------------------------------------
/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml:
--------------------------------------------------------------------------------
 1 | # A large PanopticFPN for demo purposes.
 2 | # Use GN on backbone to support semantic seg.
 3 | # Use Cascade + Deform Conv to improve localization.
 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
 5 | MODEL:
 6 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
 7 |   RESNETS:
 8 |     DEPTH: 101
 9 |     NORM: "GN"
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |     STRIDE_IN_1X1: False
12 |   FPN:
13 |     NORM: "GN"
14 |   ROI_HEADS:
15 |     NAME: CascadeROIHeads
16 |   ROI_BOX_HEAD:
17 |     CLS_AGNOSTIC_BBOX_REG: True
18 |   ROI_MASK_HEAD:
19 |     NORM: "GN"
20 |   RPN:
21 |     POST_NMS_TOPK_TRAIN: 2000
22 | SOLVER:
23 |   STEPS: (105000, 125000)
24 |   MAX_ITER: 135000
25 |   IMS_PER_BATCH: 32
26 |   BASE_LR: 0.04
27 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   # Train from random initialization.
 4 |   WEIGHTS: ""
 5 |   # It makes sense to divide by STD when training from scratch
 6 |   # But it seems to make no difference on the results and C2's models didn't do this.
 7 |   # So we keep things consistent with C2.
 8 |   # PIXEL_STD: [57.375, 57.12, 58.395]
 9 |   MASK_ON: True
10 |   BACKBONE:
11 |     FREEZE_AT: 0
12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
13 | # to learn what you need for training from scratch.
14 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/configs/Misc/semantic_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | 


--------------------------------------------------------------------------------
/configs/Misc/torchvision_imagenet_R_50.py:
--------------------------------------------------------------------------------
  1 | """
  2 | An example config file to train a ImageNet classifier with detectron2.
  3 | Model and dataloader both come from torchvision.
  4 | This shows how to use detectron2 as a general engine for any new models and tasks.
  5 | 
  6 | To run, use the following command:
  7 | 
  8 | python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
  9 |     --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
 10 | 
 11 | """
 12 | 
 13 | 
 14 | import torch
 15 | from torch import nn
 16 | from torch.nn import functional as F
 17 | from omegaconf import OmegaConf
 18 | import torchvision
 19 | from torchvision.transforms import transforms as T
 20 | from torchvision.models.resnet import ResNet, Bottleneck
 21 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 22 | 
 23 | from detectron2.solver import WarmupParamScheduler
 24 | from detectron2.solver.build import get_default_optimizer_params
 25 | from detectron2.config import LazyCall as L
 26 | from detectron2.model_zoo import get_config
 27 | from detectron2.data.samplers import TrainingSampler, InferenceSampler
 28 | from detectron2.evaluation import DatasetEvaluator
 29 | from detectron2.utils import comm
 30 | 
 31 | 
 32 | """
 33 | Note: Here we put reusable code (models, evaluation, data) together with configs just as a
 34 | proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
 35 | Writing code in configs offers extreme flexibility but is often not a good engineering practice.
 36 | In practice, you might want to put code in your project and import them instead.
 37 | """
 38 | 
 39 | 
 40 | def build_data_loader(dataset, batch_size, num_workers, training=True):
 41 |     return torch.utils.data.DataLoader(
 42 |         dataset,
 43 |         sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
 44 |         batch_size=batch_size,
 45 |         num_workers=num_workers,
 46 |         pin_memory=True,
 47 |     )
 48 | 
 49 | 
 50 | class ClassificationNet(nn.Module):
 51 |     def __init__(self, model: nn.Module):
 52 |         super().__init__()
 53 |         self.model = model
 54 | 
 55 |     @property
 56 |     def device(self):
 57 |         return list(self.model.parameters())[0].device
 58 | 
 59 |     def forward(self, inputs):
 60 |         image, label = inputs
 61 |         pred = self.model(image.to(self.device))
 62 |         if self.training:
 63 |             label = label.to(self.device)
 64 |             return F.cross_entropy(pred, label)
 65 |         else:
 66 |             return pred
 67 | 
 68 | 
 69 | class ClassificationAcc(DatasetEvaluator):
 70 |     def reset(self):
 71 |         self.corr = self.total = 0
 72 | 
 73 |     def process(self, inputs, outputs):
 74 |         image, label = inputs
 75 |         self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
 76 |         self.total += len(label)
 77 | 
 78 |     def evaluate(self):
 79 |         all_corr_total = comm.all_gather([self.corr, self.total])
 80 |         corr = sum(x[0] for x in all_corr_total)
 81 |         total = sum(x[1] for x in all_corr_total)
 82 |         return {"accuracy": corr / total}
 83 | 
 84 | 
 85 | # --- End of code that could be in a project and be imported
 86 | 
 87 | 
 88 | dataloader = OmegaConf.create()
 89 | dataloader.train = L(build_data_loader)(
 90 |     dataset=L(torchvision.datasets.ImageNet)(
 91 |         root="/path/to/imagenet",
 92 |         split="train",
 93 |         transform=L(T.Compose)(
 94 |             transforms=[
 95 |                 L(T.RandomResizedCrop)(size=224),
 96 |                 L(T.RandomHorizontalFlip)(),
 97 |                 T.ToTensor(),
 98 |                 L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
 99 |             ]
100 |         ),
101 |     ),
102 |     batch_size=256 // 8,
103 |     num_workers=4,
104 |     training=True,
105 | )
106 | 
107 | dataloader.test = L(build_data_loader)(
108 |     dataset=L(torchvision.datasets.ImageNet)(
109 |         root="${...train.dataset.root}",
110 |         split="val",
111 |         transform=L(T.Compose)(
112 |             transforms=[
113 |                 L(T.Resize)(size=256),
114 |                 L(T.CenterCrop)(size=224),
115 |                 T.ToTensor(),
116 |                 L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
117 |             ]
118 |         ),
119 |     ),
120 |     batch_size=256 // 8,
121 |     num_workers=4,
122 |     training=False,
123 | )
124 | 
125 | dataloader.evaluator = L(ClassificationAcc)()
126 | 
127 | model = L(ClassificationNet)(
128 |     model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
129 | )
130 | 
131 | 
132 | optimizer = L(torch.optim.SGD)(
133 |     params=L(get_default_optimizer_params)(),
134 |     lr=0.1,
135 |     momentum=0.9,
136 |     weight_decay=1e-4,
137 | )
138 | 
139 | lr_multiplier = L(WarmupParamScheduler)(
140 |     scheduler=L(MultiStepParamScheduler)(
141 |         values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
142 |     ),
143 |     warmup_length=1 / 100,
144 |     warmup_factor=0.1,
145 | )
146 | 
147 | 
148 | train = get_config("common/train.py").train
149 | train.init_checkpoint = None
150 | train.max_iter = 100 * 1281167 // 256
151 | 


--------------------------------------------------------------------------------
/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/configs/common/README.md:
--------------------------------------------------------------------------------
1 | This directory provides definitions for a few common models, dataloaders, scheduler,
2 | and optimizers that are often used in training.
3 | The definition of these objects are provided in the form of lazy instantiation:
4 | their arguments can be edited by users before constructing the objects.
5 | 
6 | They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
7 | 


--------------------------------------------------------------------------------
/configs/common/coco_schedule.py:
--------------------------------------------------------------------------------
 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.solver import WarmupParamScheduler
 5 | 
 6 | 
 7 | def default_X_scheduler(num_X):
 8 |     """
 9 |     Returns the config for a default multi-step LR scheduler such as "1x", "3x",
10 |     commonly referred to in papers, where every 1x has the total length of 1440k
11 |     training images (~12 COCO epochs). LR is decayed twice at the end of training
12 |     following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
13 | 
14 |     Args:
15 |         num_X: a positive real number
16 | 
17 |     Returns:
18 |         DictConfig: configs that define the multiplier for LR during training
19 |     """
20 |     # total number of iterations assuming 16 batch size, using 1440000/16=90000
21 |     total_steps_16bs = num_X * 90000
22 | 
23 |     if num_X <= 2:
24 |         scheduler = L(MultiStepParamScheduler)(
25 |             values=[1.0, 0.1, 0.01],
26 |             # note that scheduler is scale-invariant. This is equivalent to
27 |             # milestones=[6, 8, 9]
28 |             milestones=[60000, 80000, 90000],
29 |         )
30 |     else:
31 |         scheduler = L(MultiStepParamScheduler)(
32 |             values=[1.0, 0.1, 0.01],
33 |             milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
34 |         )
35 |     return L(WarmupParamScheduler)(
36 |         scheduler=scheduler,
37 |         warmup_length=1000 / total_steps_16bs,
38 |         warmup_method="linear",
39 |         warmup_factor=0.001,
40 |     )
41 | 
42 | 
43 | lr_multiplier_1x = default_X_scheduler(1)
44 | lr_multiplier_2x = default_X_scheduler(2)
45 | lr_multiplier_3x = default_X_scheduler(3)
46 | lr_multiplier_6x = default_X_scheduler(6)
47 | lr_multiplier_9x = default_X_scheduler(9)
48 | 


--------------------------------------------------------------------------------
/configs/common/data/coco.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | 
 3 | import detectron2.data.transforms as T
 4 | from detectron2.config import LazyCall as L
 5 | from detectron2.data import (
 6 |     DatasetMapper,
 7 |     build_detection_test_loader,
 8 |     build_detection_train_loader,
 9 |     get_detection_dataset_dicts,
10 | )
11 | from detectron2.evaluation import COCOEvaluator
12 | 
13 | dataloader = OmegaConf.create()
14 | 
15 | dataloader.train = L(build_detection_train_loader)(
16 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
17 |     mapper=L(DatasetMapper)(
18 |         is_train=True,
19 |         augmentations=[
20 |             L(T.ResizeShortestEdge)(
21 |                 short_edge_length=(640, 672, 704, 736, 768, 800),
22 |                 sample_style="choice",
23 |                 max_size=1333,
24 |             ),
25 |             L(T.RandomFlip)(horizontal=True),
26 |         ],
27 |         image_format="BGR",
28 |         use_instance_mask=True,
29 |     ),
30 |     total_batch_size=16,
31 |     num_workers=4,
32 | )
33 | 
34 | dataloader.test = L(build_detection_test_loader)(
35 |     dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
36 |     mapper=L(DatasetMapper)(
37 |         is_train=False,
38 |         augmentations=[
39 |             L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
40 |         ],
41 |         image_format="${...train.mapper.image_format}",
42 |     ),
43 |     num_workers=4,
44 | )
45 | 
46 | dataloader.evaluator = L(COCOEvaluator)(
47 |     dataset_name="${..test.dataset.names}",
48 | )
49 | 


--------------------------------------------------------------------------------
/configs/common/data/coco_keypoint.py:
--------------------------------------------------------------------------------
 1 | from detectron2.data.detection_utils import create_keypoint_hflip_indices
 2 | 
 3 | from .coco import dataloader
 4 | 
 5 | dataloader.train.dataset.min_keypoints = 1
 6 | dataloader.train.dataset.names = "keypoints_coco_2017_train"
 7 | dataloader.test.dataset.names = "keypoints_coco_2017_val"
 8 | 
 9 | dataloader.train.mapper.update(
10 |     use_instance_mask=False,
11 |     use_keypoint=True,
12 |     keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
13 | )
14 | 


--------------------------------------------------------------------------------
/configs/common/data/coco_panoptic_separated.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.evaluation import (
 3 |     COCOEvaluator,
 4 |     COCOPanopticEvaluator,
 5 |     DatasetEvaluators,
 6 |     SemSegEvaluator,
 7 | )
 8 | 
 9 | from .coco import dataloader
10 | 
11 | dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
12 | dataloader.train.dataset.filter_empty = False
13 | dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
14 | 
15 | 
16 | dataloader.evaluator = [
17 |     L(COCOEvaluator)(
18 |         dataset_name="${...test.dataset.names}",
19 |     ),
20 |     L(SemSegEvaluator)(
21 |         dataset_name="${...test.dataset.names}",
22 |     ),
23 |     L(COCOPanopticEvaluator)(
24 |         dataset_name="${...test.dataset.names}",
25 |     ),
26 | ]
27 | 


--------------------------------------------------------------------------------
/configs/common/models/cascade_rcnn.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.box_regression import Box2BoxTransform
 4 | from detectron2.modeling.matcher import Matcher
 5 | from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
 6 | 
 7 | from .mask_rcnn_fpn import model
 8 | 
 9 | # arguments that don't exist for Cascade R-CNN
10 | [model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
11 | 
12 | model.roi_heads.update(
13 |     _target_=CascadeROIHeads,
14 |     box_heads=[
15 |         L(FastRCNNConvFCHead)(
16 |             input_shape=ShapeSpec(channels=256, height=7, width=7),
17 |             conv_dims=[],
18 |             fc_dims=[1024, 1024],
19 |         )
20 |         for k in range(3)
21 |     ],
22 |     box_predictors=[
23 |         L(FastRCNNOutputLayers)(
24 |             input_shape=ShapeSpec(channels=1024),
25 |             test_score_thresh=0.05,
26 |             box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
27 |             cls_agnostic_bbox_reg=True,
28 |             num_classes="${...num_classes}",
29 |         )
30 |         for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
31 |     ],
32 |     proposal_matchers=[
33 |         L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
34 |         for th in [0.5, 0.6, 0.7]
35 |     ],
36 | )
37 | 


--------------------------------------------------------------------------------
/configs/common/models/fcos.py:
--------------------------------------------------------------------------------
 1 | from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead
 2 | 
 3 | from .retinanet import model
 4 | 
 5 | model._target_ = FCOS
 6 | 
 7 | del model.anchor_generator
 8 | del model.box2box_transform
 9 | del model.anchor_matcher
10 | del model.input_format
11 | 
12 | # Use P5 instead of C5 to compute P6/P7
13 | # (Sec 2.2 of https://arxiv.org/abs/2006.09214)
14 | model.backbone.top_block.in_feature = "p5"
15 | model.backbone.top_block.in_channels = 256
16 | 
17 | # New score threshold determined based on sqrt(cls_score * centerness)
18 | model.test_score_thresh = 0.2
19 | model.test_nms_thresh = 0.6
20 | 
21 | model.head._target_ = FCOSHead
22 | del model.head.num_anchors
23 | model.head.norm = "GN"
24 | 


--------------------------------------------------------------------------------
/configs/common/models/keypoint_rcnn_fpn.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.poolers import ROIPooler
 4 | from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
 5 | 
 6 | from .mask_rcnn_fpn import model
 7 | 
 8 | [model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
 9 | 
10 | model.roi_heads.update(
11 |     num_classes=1,
12 |     keypoint_in_features=["p2", "p3", "p4", "p5"],
13 |     keypoint_pooler=L(ROIPooler)(
14 |         output_size=14,
15 |         scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
16 |         sampling_ratio=0,
17 |         pooler_type="ROIAlignV2",
18 |     ),
19 |     keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
20 |         input_shape=ShapeSpec(channels=256, width=14, height=14),
21 |         num_keypoints=17,
22 |         conv_dims=[512] * 8,
23 |         loss_normalizer="visible",
24 |     ),
25 | )
26 | 
27 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
28 | # 1000 proposals per-image is found to hurt box AP.
29 | # Therefore we increase it to 1500 per-image.
30 | model.proposal_generator.post_nms_topk = (1500, 1000)
31 | 
32 | # Keypoint AP degrades (though box AP improves) when using plain L1 loss
33 | model.roi_heads.box_predictor.smooth_l1_beta = 0.5
34 | 


--------------------------------------------------------------------------------
/configs/common/models/mask_rcnn_c4.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.meta_arch import GeneralizedRCNN
 4 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
 5 | from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
 6 | from detectron2.modeling.box_regression import Box2BoxTransform
 7 | from detectron2.modeling.matcher import Matcher
 8 | from detectron2.modeling.poolers import ROIPooler
 9 | from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
10 | from detectron2.modeling.roi_heads import (
11 |     FastRCNNOutputLayers,
12 |     MaskRCNNConvUpsampleHead,
13 |     Res5ROIHeads,
14 | )
15 | 
16 | model = L(GeneralizedRCNN)(
17 |     backbone=L(ResNet)(
18 |         stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
19 |         stages=L(ResNet.make_default_stages)(
20 |             depth=50,
21 |             stride_in_1x1=True,
22 |             norm="FrozenBN",
23 |         ),
24 |         out_features=["res4"],
25 |     ),
26 |     proposal_generator=L(RPN)(
27 |         in_features=["res4"],
28 |         head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
29 |         anchor_generator=L(DefaultAnchorGenerator)(
30 |             sizes=[[32, 64, 128, 256, 512]],
31 |             aspect_ratios=[0.5, 1.0, 2.0],
32 |             strides=[16],
33 |             offset=0.0,
34 |         ),
35 |         anchor_matcher=L(Matcher)(
36 |             thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
37 |         ),
38 |         box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
39 |         batch_size_per_image=256,
40 |         positive_fraction=0.5,
41 |         pre_nms_topk=(12000, 6000),
42 |         post_nms_topk=(2000, 1000),
43 |         nms_thresh=0.7,
44 |     ),
45 |     roi_heads=L(Res5ROIHeads)(
46 |         num_classes=80,
47 |         batch_size_per_image=512,
48 |         positive_fraction=0.25,
49 |         proposal_matcher=L(Matcher)(
50 |             thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
51 |         ),
52 |         in_features=["res4"],
53 |         pooler=L(ROIPooler)(
54 |             output_size=14,
55 |             scales=(1.0 / 16,),
56 |             sampling_ratio=0,
57 |             pooler_type="ROIAlignV2",
58 |         ),
59 |         res5=L(ResNet.make_stage)(
60 |             block_class=BottleneckBlock,
61 |             num_blocks=3,
62 |             stride_per_block=[2, 1, 1],
63 |             in_channels=1024,
64 |             bottleneck_channels=512,
65 |             out_channels=2048,
66 |             norm="FrozenBN",
67 |             stride_in_1x1=True,
68 |         ),
69 |         box_predictor=L(FastRCNNOutputLayers)(
70 |             input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
71 |             test_score_thresh=0.05,
72 |             box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
73 |             num_classes="${..num_classes}",
74 |         ),
75 |         mask_head=L(MaskRCNNConvUpsampleHead)(
76 |             input_shape=L(ShapeSpec)(
77 |                 channels="${...res5.out_channels}",
78 |                 width="${...pooler.output_size}",
79 |                 height="${...pooler.output_size}",
80 |             ),
81 |             num_classes="${..num_classes}",
82 |             conv_dims=[256],
83 |         ),
84 |     ),
85 |     pixel_mean=[103.530, 116.280, 123.675],
86 |     pixel_std=[1.0, 1.0, 1.0],
87 |     input_format="BGR",
88 | )
89 | 


--------------------------------------------------------------------------------
/configs/common/models/mask_rcnn_fpn.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.meta_arch import GeneralizedRCNN
 4 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
 5 | from detectron2.modeling.backbone.fpn import LastLevelMaxPool
 6 | from detectron2.modeling.backbone import BasicStem, FPN, ResNet
 7 | from detectron2.modeling.box_regression import Box2BoxTransform
 8 | from detectron2.modeling.matcher import Matcher
 9 | from detectron2.modeling.poolers import ROIPooler
10 | from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
11 | from detectron2.modeling.roi_heads import (
12 |     StandardROIHeads,
13 |     FastRCNNOutputLayers,
14 |     MaskRCNNConvUpsampleHead,
15 |     FastRCNNConvFCHead,
16 | )
17 | 
18 | model = L(GeneralizedRCNN)(
19 |     backbone=L(FPN)(
20 |         bottom_up=L(ResNet)(
21 |             stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
22 |             stages=L(ResNet.make_default_stages)(
23 |                 depth=50,
24 |                 stride_in_1x1=True,
25 |                 norm="FrozenBN",
26 |             ),
27 |             out_features=["res2", "res3", "res4", "res5"],
28 |         ),
29 |         in_features="${.bottom_up.out_features}",
30 |         out_channels=256,
31 |         top_block=L(LastLevelMaxPool)(),
32 |     ),
33 |     proposal_generator=L(RPN)(
34 |         in_features=["p2", "p3", "p4", "p5", "p6"],
35 |         head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
36 |         anchor_generator=L(DefaultAnchorGenerator)(
37 |             sizes=[[32], [64], [128], [256], [512]],
38 |             aspect_ratios=[0.5, 1.0, 2.0],
39 |             strides=[4, 8, 16, 32, 64],
40 |             offset=0.0,
41 |         ),
42 |         anchor_matcher=L(Matcher)(
43 |             thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
44 |         ),
45 |         box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
46 |         batch_size_per_image=256,
47 |         positive_fraction=0.5,
48 |         pre_nms_topk=(2000, 1000),
49 |         post_nms_topk=(1000, 1000),
50 |         nms_thresh=0.7,
51 |     ),
52 |     roi_heads=L(StandardROIHeads)(
53 |         num_classes=80,
54 |         batch_size_per_image=512,
55 |         positive_fraction=0.25,
56 |         proposal_matcher=L(Matcher)(
57 |             thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
58 |         ),
59 |         box_in_features=["p2", "p3", "p4", "p5"],
60 |         box_pooler=L(ROIPooler)(
61 |             output_size=7,
62 |             scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
63 |             sampling_ratio=0,
64 |             pooler_type="ROIAlignV2",
65 |         ),
66 |         box_head=L(FastRCNNConvFCHead)(
67 |             input_shape=ShapeSpec(channels=256, height=7, width=7),
68 |             conv_dims=[],
69 |             fc_dims=[1024, 1024],
70 |         ),
71 |         box_predictor=L(FastRCNNOutputLayers)(
72 |             input_shape=ShapeSpec(channels=1024),
73 |             test_score_thresh=0.05,
74 |             box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
75 |             num_classes="${..num_classes}",
76 |         ),
77 |         mask_in_features=["p2", "p3", "p4", "p5"],
78 |         mask_pooler=L(ROIPooler)(
79 |             output_size=14,
80 |             scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
81 |             sampling_ratio=0,
82 |             pooler_type="ROIAlignV2",
83 |         ),
84 |         mask_head=L(MaskRCNNConvUpsampleHead)(
85 |             input_shape=ShapeSpec(channels=256, width=14, height=14),
86 |             num_classes="${..num_classes}",
87 |             conv_dims=[256, 256, 256, 256, 256],
88 |         ),
89 |     ),
90 |     pixel_mean=[103.530, 116.280, 123.675],
91 |     pixel_std=[1.0, 1.0, 1.0],
92 |     input_format="BGR",
93 | )
94 | 


--------------------------------------------------------------------------------
/configs/common/models/panoptic_fpn.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import LazyCall as L
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling import PanopticFPN
 4 | from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
 5 | 
 6 | from .mask_rcnn_fpn import model
 7 | 
 8 | model._target_ = PanopticFPN
 9 | model.sem_seg_head = L(SemSegFPNHead)(
10 |     input_shape={
11 |         f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
12 |         for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
13 |     },
14 |     ignore_value=255,
15 |     num_classes=54,  # COCO stuff + 1
16 |     conv_dims=128,
17 |     common_stride=4,
18 |     loss_weight=0.5,
19 |     norm="GN",
20 | )
21 | 


--------------------------------------------------------------------------------
/configs/common/models/retinanet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.layers import ShapeSpec
 5 | from detectron2.modeling.meta_arch import RetinaNet
 6 | from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
 7 | from detectron2.modeling.backbone.fpn import LastLevelP6P7
 8 | from detectron2.modeling.backbone import BasicStem, FPN, ResNet
 9 | from detectron2.modeling.box_regression import Box2BoxTransform
10 | from detectron2.modeling.matcher import Matcher
11 | from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
12 | 
13 | model = L(RetinaNet)(
14 |     backbone=L(FPN)(
15 |         bottom_up=L(ResNet)(
16 |             stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
17 |             stages=L(ResNet.make_default_stages)(
18 |                 depth=50,
19 |                 stride_in_1x1=True,
20 |                 norm="FrozenBN",
21 |             ),
22 |             out_features=["res3", "res4", "res5"],
23 |         ),
24 |         in_features=["res3", "res4", "res5"],
25 |         out_channels=256,
26 |         top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
27 |     ),
28 |     head=L(RetinaNetHead)(
29 |         # Shape for each input feature map
30 |         input_shape=[ShapeSpec(channels=256)] * 5,
31 |         num_classes="${..num_classes}",
32 |         conv_dims=[256, 256, 256, 256],
33 |         prior_prob=0.01,
34 |         num_anchors=9,
35 |     ),
36 |     anchor_generator=L(DefaultAnchorGenerator)(
37 |         sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
38 |         aspect_ratios=[0.5, 1.0, 2.0],
39 |         strides=[8, 16, 32, 64, 128],
40 |         offset=0.0,
41 |     ),
42 |     box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
43 |     anchor_matcher=L(Matcher)(
44 |         thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
45 |     ),
46 |     num_classes=80,
47 |     head_in_features=["p3", "p4", "p5", "p6", "p7"],
48 |     focal_loss_alpha=0.25,
49 |     focal_loss_gamma=2.0,
50 |     pixel_mean=[103.530, 116.280, 123.675],
51 |     pixel_std=[1.0, 1.0, 1.0],
52 |     input_format="BGR",
53 | )
54 | 


--------------------------------------------------------------------------------
/configs/common/optim.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from detectron2.config import LazyCall as L
 4 | from detectron2.solver.build import get_default_optimizer_params
 5 | 
 6 | SGD = L(torch.optim.SGD)(
 7 |     params=L(get_default_optimizer_params)(
 8 |         # params.model is meant to be set to the model object, before instantiating
 9 |         # the optimizer.
10 |         weight_decay_norm=0.0
11 |     ),
12 |     lr=0.02,
13 |     momentum=0.9,
14 |     weight_decay=1e-4,
15 | )
16 | 


--------------------------------------------------------------------------------
/configs/common/train.py:
--------------------------------------------------------------------------------
 1 | # Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
 2 | # You can use your own instead, together with your own train_net.py
 3 | train = dict(
 4 |     output_dir="./output",
 5 |     init_checkpoint="",
 6 |     max_iter=90000,
 7 |     amp=dict(enabled=False),  # options for Automatic Mixed Precision
 8 |     ddp=dict(  # options for DistributedDataParallel
 9 |         broadcast_buffers=False,
10 |         find_unused_parameters=False,
11 |         fp16_compression=False,
12 |     ),
13 |     checkpointer=dict(period=5000, max_to_keep=100),  # options for PeriodicCheckpointer
14 |     eval_period=5000,
15 |     log_period=20,
16 |     device="cuda"
17 |     # ...
18 | )
19 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | model.backbone.bottom_up.stages.depth = 101
10 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_101_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 2  # 100ep -> 200ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 2 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_101_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 4  # 100ep -> 400ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 4 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | import detectron2.data.transforms as T
 2 | from detectron2.config.lazy import LazyCall as L
 3 | from detectron2.layers.batch_norm import NaiveSyncBatchNorm
 4 | from detectron2.solver import WarmupParamScheduler
 5 | from fvcore.common.param_scheduler import MultiStepParamScheduler
 6 | 
 7 | from ..common.data.coco import dataloader
 8 | from ..common.models.mask_rcnn_fpn import model
 9 | from ..common.optim import SGD as optimizer
10 | from ..common.train import train
11 | 
12 | # train from scratch
13 | train.init_checkpoint = ""
14 | train.amp.enabled = True
15 | train.ddp.fp16_compression = True
16 | model.backbone.bottom_up.freeze_at = 0
17 | 
18 | # SyncBN
19 | # fmt: off
20 | model.backbone.bottom_up.stem.norm = \
21 |     model.backbone.bottom_up.stages.norm = \
22 |     model.backbone.norm = "SyncBN"
23 | 
24 | # Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
25 | # torch.nn.SyncBatchNorm. We can remove this after
26 | # https://github.com/pytorch/pytorch/issues/36530 is fixed.
27 | model.roi_heads.box_head.conv_norm = \
28 |     model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
29 |                                                                        stats_mode="N")
30 | # fmt: on
31 | 
32 | # 2conv in RPN:
33 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97  # noqa: E501, B950
34 | model.proposal_generator.head.conv_dims = [-1, -1]
35 | 
36 | # 4conv1fc box head
37 | model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
38 | model.roi_heads.box_head.fc_dims = [1024]
39 | 
40 | # resize_and_crop_image in:
41 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127  # noqa: E501, B950
42 | image_size = 1024
43 | dataloader.train.mapper.augmentations = [
44 |     L(T.ResizeScale)(
45 |         min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
46 |     ),
47 |     L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
48 |     L(T.RandomFlip)(horizontal=True),
49 | ]
50 | 
51 | # recompute boxes due to cropping
52 | dataloader.train.mapper.recompute_boxes = True
53 | 
54 | # larger batch-size.
55 | dataloader.train.total_batch_size = 64
56 | 
57 | # Equivalent to 100 epochs.
58 | # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
59 | train.max_iter = 184375
60 | 
61 | lr_multiplier = L(WarmupParamScheduler)(
62 |     scheduler=L(MultiStepParamScheduler)(
63 |         values=[1.0, 0.1, 0.01],
64 |         milestones=[163889, 177546],
65 |         num_updates=train.max_iter,
66 |     ),
67 |     warmup_length=500 / train.max_iter,
68 |     warmup_factor=0.067,
69 | )
70 | 
71 | optimizer.lr = 0.1
72 | optimizer.weight_decay = 4e-5
73 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 2  # 100ep -> 200ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 2 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 4  # 100ep -> 400ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 4 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter //= 2  # 100ep -> 50ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone // 2 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | from detectron2.config import LazyCall as L
 9 | from detectron2.modeling.backbone import RegNet
10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
11 | 
12 | # Config source:
13 | # https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=23,
19 |     w_a=38.65,
20 |     w_0=96,
21 |     w_m=2.43,
22 |     group_width=40,
23 |     norm="SyncBN",
24 |     out_features=["s1", "s2", "s3", "s4"],
25 | )
26 | model.pixel_std = [57.375, 57.120, 58.395]
27 | 
28 | # RegNets benefit from enabling cudnn benchmark mode
29 | train.cudnn_benchmark = True
30 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 2  # 100ep -> 200ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 2 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 4  # 100ep -> 400ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 4 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | from detectron2.config import LazyCall as L
 9 | from detectron2.modeling.backbone import RegNet
10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
11 | 
12 | # Config source:
13 | # https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=22,
19 |     w_a=31.41,
20 |     w_0=96,
21 |     w_m=2.24,
22 |     group_width=64,
23 |     se_ratio=0.25,
24 |     norm="SyncBN",
25 |     out_features=["s1", "s2", "s3", "s4"],
26 | )
27 | model.pixel_std = [57.375, 57.120, 58.395]
28 | 
29 | # RegNets benefit from enabling cudnn benchmark mode
30 | train.cudnn_benchmark = True
31 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 2  # 100ep -> 200ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 2 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py:
--------------------------------------------------------------------------------
 1 | from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
 2 |     dataloader,
 3 |     lr_multiplier,
 4 |     model,
 5 |     optimizer,
 6 |     train,
 7 | )
 8 | 
 9 | train.max_iter *= 4  # 100ep -> 400ep
10 | 
11 | lr_multiplier.scheduler.milestones = [
12 |     milestone * 4 for milestone in lr_multiplier.scheduler.milestones
13 | ]
14 | lr_multiplier.scheduler.num_updates = train.max_iter
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/README.md:
--------------------------------------------------------------------------------
1 | These are quick configs for performance or accuracy regression tracking purposes.
2 | 
3 | * `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
4 |   successfully finish. They are not expected to produce reasonable training results.
5 | * `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
6 |   the results are as expected.
7 | * `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
8 |   is within the normal range.
9 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
 2 | DATASETS:
 3 |   TRAIN: ("coco_2017_val_100",)
 4 |   TEST: ("coco_2017_val_100",)
 5 | SOLVER:
 6 |   BASE_LR: 0.005
 7 |   STEPS: (30,)
 8 |   MAX_ITER: 40
 9 |   IMS_PER_BATCH: 4
10 | DATALOADER:
11 |   NUM_WORKERS: 2
12 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 7 |   TEST: ("coco_2017_val_100",)
 8 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 9 | SOLVER:
10 |   BASE_LR: 0.005
11 |   STEPS: (30,)
12 |   MAX_ITER: 40
13 |   IMS_PER_BATCH: 4
14 | DATALOADER:
15 |   NUM_WORKERS: 2
16 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
4 | DATASETS:
5 |   TEST: ("keypoints_coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   ROI_HEADS:
 6 |     NUM_CLASSES: 1
 7 | DATASETS:
 8 |   TRAIN: ("keypoints_coco_2017_val_100",)
 9 |   TEST: ("keypoints_coco_2017_val_100",)
10 | SOLVER:
11 |   BASE_LR: 0.005
12 |   STEPS: (30,)
13 |   MAX_ITER: 40
14 |   IMS_PER_BATCH: 4
15 | DATALOADER:
16 |   NUM_WORKERS: 2
17 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
14 |     LOSS_WEIGHT: 4.0
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
17 |   RPN:
18 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
19 | DATASETS:
20 |   TRAIN: ("keypoints_coco_2017_val",)
21 |   TEST: ("keypoints_coco_2017_val",)
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24 | SOLVER:
25 |   WARMUP_FACTOR: 0.33333333
26 |   WARMUP_ITERS: 100
27 |   STEPS: (5500, 5800)
28 |   MAX_ITER: 6000
29 | TEST:
30 |   EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
31 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |   ROI_BOX_HEAD:
14 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
15 |   RPN:
16 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
17 | DATASETS:
18 |   TRAIN: ("keypoints_coco_2017_val",)
19 |   TEST: ("keypoints_coco_2017_val",)
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | SOLVER:
23 |   WARMUP_FACTOR: 0.33333333
24 |   WARMUP_ITERS: 100
25 |   STEPS: (5500, 5800)
26 |   MAX_ITER: 6000
27 | TEST:
28 |   EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 |   CLIP_GRADIENTS:
14 |     ENABLED: True
15 |     CLIP_TYPE: "value"
16 |     CLIP_VALUE: 1.0
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   IMS_PER_BATCH: 8  # base uses 16
17 |   WARMUP_FACTOR: 0.33333
18 |   WARMUP_ITERS: 100
19 |   STEPS: (11000, 11600)
20 |   MAX_ITER: 12000
21 | TEST:
22 |   EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
23 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 4 | DATASETS:
 5 |   TEST: ("coco_2017_val_100",)
 6 | TEST:
 7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
 8 |   AUG:
 9 |     ENABLED: True
10 |     MIN_SIZES: (700, 800)  # to save some time
11 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     TRAIN_ON_PRED_BOXES: True
5 | TEST:
6 |   EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
7 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   WARMUP_FACTOR: 0.3333333
17 |   WARMUP_ITERS: 100
18 |   STEPS: (5500, 5800)
19 |   MAX_ITER: 6000
20 | TEST:
21 |   EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
22 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100_panoptic_separated",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_100_panoptic_separated",)
12 |   TEST: ("coco_2017_val_100_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.005
15 |   STEPS: (30,)
16 |   MAX_ITER: 40
17 |   IMS_PER_BATCH: 4
18 | DATALOADER:
19 |   NUM_WORKERS: 1
20 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_panoptic_separated",)
12 |   TEST: ("coco_2017_val_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.01
15 |   WARMUP_FACTOR: 0.001
16 |   WARMUP_ITERS: 500
17 |   STEPS: (5500,)
18 |   MAX_ITER: 7000
19 | TEST:
20 |   EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
21 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   BASE_LR: 0.005
 9 |   STEPS: (30,)
10 |   MAX_ITER: 40
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   STEPS: (30,)
 9 |   MAX_ITER: 40
10 |   BASE_LR: 0.005
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
 9 | TEST:
10 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
11 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | SOLVER:
13 |   BASE_LR: 0.005
14 |   STEPS: (30,)
15 |   MAX_ITER: 40
16 |   IMS_PER_BATCH: 4
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WARMUP_FACTOR: 0.001
13 |   WARMUP_ITERS: 300
14 |   STEPS: (5500,)
15 |   MAX_ITER: 7000
16 | TEST:
17 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/export_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The script was taken from Detectron2 repo:
  3 | https://github.com/facebookresearch/detectron2/blob/v0.6/tools/deploy/export_model.py
  4 | 
  5 | We might need to update the script if it's no longer compatible with the later releases of detectron2
  6 | """
  7 | 
  8 | # Copyright (c) Facebook, Inc. and its affiliates.
  9 | import argparse
 10 | import os
 11 | from typing import Dict, List, Tuple
 12 | import torch
 13 | from torch import Tensor, nn
 14 | 
 15 | import detectron2.data.transforms as T
 16 | from detectron2.checkpoint import DetectionCheckpointer
 17 | from detectron2.config import get_cfg
 18 | from detectron2.data import build_detection_test_loader, detection_utils
 19 | from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
 20 | from detectron2.export import (
 21 |     Caffe2Tracer,
 22 |     TracingAdapter,
 23 |     add_export_config,
 24 |     dump_torchscript_IR,
 25 |     scripting_with_instances,
 26 | )
 27 | from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model
 28 | from detectron2.modeling.postprocessing import detector_postprocess
 29 | from detectron2.projects.point_rend import add_pointrend_config
 30 | from detectron2.structures import Boxes
 31 | from detectron2.utils.env import TORCH_VERSION
 32 | from detectron2.utils.file_io import PathManager
 33 | from detectron2.utils.logger import setup_logger
 34 | import torch
 35 | from unittest.mock import patch
 36 | from functools import wraps
 37 | 
 38 | 
 39 | def patch_torch_stack(func):
 40 |     orig_stack = torch.stack
 41 | 
 42 |     def new_stack(*args, **kwargs):
 43 |         return orig_stack(*args, **kwargs).to('cuda')
 44 | 
 45 |     @wraps(func)
 46 |     def wrapper(*args, **kwargs):
 47 |         with patch("torch.stack", side_effect=new_stack, wraps=orig_stack):
 48 |             return func(*args, **kwargs)
 49 |     return wrapper
 50 | 
 51 | 
 52 | def setup_cfg(args):
 53 |     cfg = get_cfg()
 54 |     # cuda context is initialized before creating dataloader, so we don't fork anymore
 55 |     cfg.DATALOADER.NUM_WORKERS = 0
 56 |     cfg = add_export_config(cfg)
 57 |     add_pointrend_config(cfg)
 58 |     cfg.merge_from_file(args.config_file)
 59 |     cfg.merge_from_list(args.opts)
 60 |     cfg.freeze()
 61 |     return cfg
 62 | 
 63 | 
 64 | def export_caffe2_tracing(cfg, torch_model, inputs):
 65 |     tracer = Caffe2Tracer(cfg, torch_model, inputs)
 66 |     if args.format == "caffe2":
 67 |         caffe2_model = tracer.export_caffe2()
 68 |         caffe2_model.save_protobuf(args.output)
 69 |         # draw the caffe2 graph
 70 |         caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs)
 71 |         return caffe2_model
 72 |     elif args.format == "onnx":
 73 |         import onnx
 74 | 
 75 |         onnx_model = tracer.export_onnx()
 76 |         onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
 77 |     elif args.format == "torchscript":
 78 |         ts_model = tracer.export_torchscript()
 79 |         with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
 80 |             torch.jit.save(ts_model, f)
 81 |         dump_torchscript_IR(ts_model, args.output)
 82 | 
 83 | 
 84 | # experimental. API not yet final
 85 | @patch_torch_stack
 86 | def export_scripting(torch_model):
 87 |     assert TORCH_VERSION >= (1, 8)
 88 |     fields = {
 89 |         "proposal_boxes": Boxes,
 90 |         "objectness_logits": Tensor,
 91 |         "pred_boxes": Boxes,
 92 |         "scores": Tensor,
 93 |         "pred_classes": Tensor,
 94 |         "pred_masks": Tensor,
 95 |         "pred_keypoints": torch.Tensor,
 96 |         "pred_keypoint_heatmaps": torch.Tensor,
 97 |     }
 98 |     assert args.format == "torchscript", "Scripting only supports torchscript format."
 99 | 
100 |     class ScriptableAdapterBase(nn.Module):
101 |         # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944
102 |         # by not retuning instances but dicts. Otherwise the exported model is not deployable
103 |         def __init__(self):
104 |             super().__init__()
105 |             self.model = torch_model
106 |             self.eval()
107 | 
108 |     if isinstance(torch_model, GeneralizedRCNN):
109 | 
110 |         class ScriptableAdapter(ScriptableAdapterBase):
111 |             def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
112 |                 instances = self.model.inference(inputs, do_postprocess=False)
113 |                 return [i.get_fields() for i in instances]
114 | 
115 |     else:
116 | 
117 |         class ScriptableAdapter(ScriptableAdapterBase):
118 |             def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
119 |                 instances = self.model(inputs)
120 |                 return [i.get_fields() for i in instances]
121 | 
122 |     ts_model = scripting_with_instances(ScriptableAdapter(), fields)
123 |     with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
124 |         torch.jit.save(ts_model, f)
125 |     dump_torchscript_IR(ts_model, args.output)
126 |     # TODO inference in Python now missing postprocessing glue code
127 |     return None
128 | 
129 | 
130 | # experimental. API not yet final
131 | @patch_torch_stack
132 | def export_tracing(torch_model, inputs):
133 |     assert TORCH_VERSION >= (1, 8)
134 |     image = inputs[0]["image"]
135 |     inputs = [{"image": image}]  # remove other unused keys
136 | 
137 |     if isinstance(torch_model, GeneralizedRCNN):
138 | 
139 |         def inference(model, inputs):
140 |             # use do_postprocess=False so it returns ROI mask
141 |             inst = model.inference(inputs, do_postprocess=False)[0]
142 |             return [{"instances": inst}]
143 | 
144 |     else:
145 |         inference = None  # assume that we just call the model directly
146 | 
147 |     traceable_model = TracingAdapter(torch_model, inputs, inference)
148 | 
149 |     if args.format == "torchscript":
150 |         ts_model = torch.jit.trace(traceable_model, (image,))
151 |         with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
152 |             torch.jit.save(ts_model, f)
153 |         dump_torchscript_IR(ts_model, args.output)
154 |     elif args.format == "onnx":
155 |         with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f:
156 |             torch.onnx.export(traceable_model, (image,), f, opset_version=11)
157 |     logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
158 |     logger.info("Outputs schema: " + str(traceable_model.outputs_schema))
159 | 
160 |     if args.format != "torchscript":
161 |         return None
162 |     if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
163 |         return None
164 | 
165 |     def eval_wrapper(inputs):
166 |         """
167 |         The exported model does not contain the final resize step, which is typically
168 |         unused in deployment but needed for evaluation. We add it manually here.
169 |         """
170 |         input = inputs[0]
171 |         instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
172 |         postprocessed = detector_postprocess(instances, input["height"], input["width"])
173 |         return [{"instances": postprocessed}]
174 | 
175 |     return eval_wrapper
176 | 
177 | 
178 | def get_sample_inputs(args):
179 | 
180 |     if args.sample_image is None:
181 |         # get a first batch from dataset
182 |         data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
183 |         first_batch = next(iter(data_loader))
184 |         return first_batch
185 |     else:
186 |         # get a sample data
187 |         original_image = detection_utils.read_image(args.sample_image, format=cfg.INPUT.FORMAT)
188 |         # Do same preprocessing as DefaultPredictor
189 |         aug = T.ResizeShortestEdge(
190 |             [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
191 |         )
192 |         height, width = original_image.shape[:2]
193 |         image = aug.get_transform(original_image).apply_image(original_image)
194 |         image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
195 | 
196 |         inputs = {"image": image, "height": height, "width": width}
197 | 
198 |         # Sample ready
199 |         sample_inputs = [inputs]
200 |         return sample_inputs
201 | 
202 | 
203 | if __name__ == "__main__":
204 |     parser = argparse.ArgumentParser(description="Export a model for deployment.")
205 |     parser.add_argument(
206 |         "--format",
207 |         choices=["caffe2", "onnx", "torchscript"],
208 |         help="output format",
209 |         default="caffe2",
210 |     )
211 |     parser.add_argument(
212 |         "--export-method",
213 |         choices=["caffe2_tracing", "tracing", "scripting"],
214 |         help="Method to export models",
215 |         default="caffe2_tracing",
216 |     )
217 |     parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
218 |     parser.add_argument("--sample-image", default=None, type=str, help="sample image for input")
219 |     parser.add_argument("--run-eval", action="store_true")
220 |     parser.add_argument("--output", help="output directory for the converted model")
221 |     parser.add_argument(
222 |         "opts",
223 |         help="Modify config options using the command-line",
224 |         default=None,
225 |         nargs=argparse.REMAINDER,
226 |     )
227 |     args = parser.parse_args()
228 |     logger = setup_logger()
229 |     logger.info("Command line arguments: " + str(args))
230 |     PathManager.mkdirs(args.output)
231 |     # Disable respecialization on new shapes. Otherwise --run-eval will be slow
232 |     torch._C._jit_set_bailout_depth(1)
233 | 
234 |     cfg = setup_cfg(args)
235 | 
236 |     # create a torch model
237 |     torch_model = build_model(cfg)
238 |     DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
239 |     torch_model.eval()
240 | 
241 |     # get sample data
242 |     sample_inputs = get_sample_inputs(args)
243 | 
244 |     # convert and save model
245 |     if args.export_method == "caffe2_tracing":
246 |         exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs)
247 |     elif args.export_method == "scripting":
248 |         exported_model = export_scripting(torch_model)
249 |     elif args.export_method == "tracing":
250 |         exported_model = export_tracing(torch_model, sample_inputs)
251 | 
252 |     # run evaluation with the converted model
253 |     if args.run_eval:
254 |         assert exported_model is not None, (
255 |             "Python inference is not yet implemented for "
256 |             f"export_method={args.export_method}, format={args.format}."
257 |         )
258 |         logger.info("Running evaluation ... this takes a long time if you export to CPU.")
259 |         dataset = cfg.DATASETS.TEST[0]
260 |         data_loader = build_detection_test_loader(cfg, dataset)
261 |         # NOTE: hard-coded evaluator. change to the evaluator for your dataset
262 |         evaluator = COCOEvaluator(dataset, output_dir=args.output)
263 |         metrics = inference_on_dataset(exported_model, data_loader, evaluator)
264 |         print_csv_format(metrics)
265 | 


--------------------------------------------------------------------------------
/models/infer_pipeline/config.pbtxt:
--------------------------------------------------------------------------------
  1 | platform: "ensemble"
  2 | max_batch_size: 0
  3 | 
  4 | input [
  5 |   {
  6 |     name: "IMAGE_BYTES"
  7 |     data_type: TYPE_STRING
  8 |     dims: [ 1 ]
  9 |   }
 10 | ]
 11 | output [
 12 |   {
 13 |     name: "BBOXES"
 14 |     data_type: TYPE_FP32
 15 |     dims: [-1,4]
 16 |   },
 17 |   {
 18 |     name: "CLASSES"
 19 |     data_type: TYPE_INT64
 20 |     dims: [-1]
 21 |   },
 22 |   {
 23 |     name: "MASKS"
 24 |     data_type: TYPE_UINT8
 25 |     dims: [-1, -1, -1]
 26 |   },
 27 |   {
 28 |     name: "SCORES"
 29 |     data_type: TYPE_FP32
 30 |     dims: [-1]
 31 |   }
 32 | ]
 33 | 
 34 | ensemble_scheduling {
 35 |   step [
 36 |     {
 37 |       model_name: "preprocess"
 38 |       model_version: 1
 39 |       input_map: {
 40 |         key: "image_bytes"
 41 |         value: "IMAGE_BYTES"
 42 |       }
 43 |       output_map: {
 44 |         key: "preprocessed_image"
 45 |         value: "PREPROCESSED_IMAGE"
 46 |       }
 47 |     },
 48 |     {
 49 |       model_name: "mask_rcnn"
 50 |       model_version: 1
 51 |       input_map: {
 52 |         key: "image__0"
 53 |         value: "PREPROCESSED_IMAGE"
 54 |       }
 55 |       output_map: {
 56 |         key: "bboxes__0"
 57 |         value: "RAW_BBOXES"
 58 |       }
 59 |       output_map: {
 60 |         key: "classes__1"
 61 |         value: "RAW_CLASSES"
 62 |       }
 63 |       output_map: {
 64 |         key: "masks__2"
 65 |         value: "RAW_MASKS"
 66 |       }
 67 |       output_map: {
 68 |         key: "scores__3"
 69 |         value: "RAW_SCORES"
 70 |       }
 71 |       output_map: {
 72 |         key: "shape__4"
 73 |         value: "SHAPE"
 74 |       }
 75 |     },
 76 |     {
 77 |       model_name: "postprocess"
 78 |       model_version: 1
 79 |       input_map: {
 80 |         key: "bboxes"
 81 |         value: "RAW_BBOXES"
 82 |       }
 83 |       input_map: {
 84 |         key: "classes"
 85 |         value: "RAW_CLASSES"
 86 |       }
 87 |       input_map: {
 88 |         key: "masks"
 89 |         value: "RAW_MASKS"
 90 |       }
 91 |       input_map: {
 92 |         key: "scores"
 93 |         value: "RAW_SCORES"
 94 |       }
 95 |       input_map: {
 96 |        key: "shape"
 97 |        value: "SHAPE"
 98 |       }
 99 |       output_map: {
100 |         key: "post_bboxes"
101 |         value: "BBOXES"
102 |       }
103 |       output_map: {
104 |         key: "post_classes"
105 |         value: "CLASSES"
106 |       }
107 |       output_map: {
108 |         key: "post_masks"
109 |         value: "MASKS"
110 |       }
111 |       output_map: {
112 |         key: "post_scores"
113 |         value: "SCORES"
114 |       }
115 |     }
116 |   ]
117 | }
118 | 


--------------------------------------------------------------------------------
/models/mask_rcnn/config.pbtxt:
--------------------------------------------------------------------------------
 1 | platform: "pytorch_libtorch"
 2 | default_model_filename: "model.ts"
 3 | max_batch_size: 0
 4 | input [
 5 |   {
 6 |     name: "image__0"
 7 |     data_type: TYPE_UINT8
 8 |     dims: [3, -1, -1]
 9 |   }
10 | ]
11 | output [
12 |   {
13 |     name: "bboxes__0"
14 |     data_type: TYPE_FP32
15 |     dims: [-1,4]
16 |   },
17 |   {
18 |     name: "classes__1"
19 |     data_type: TYPE_INT64
20 |     dims: [-1]
21 |   },
22 |   {
23 |     name: "masks__2"
24 |     data_type: TYPE_FP32
25 |     dims: [-1, 1, -1, -1]
26 |   },
27 |   {
28 |     name: "scores__3"
29 |     data_type: TYPE_FP32
30 |     dims: [-1]
31 |   },
32 |   {
33 |     name: "shape__4"
34 |     data_type: TYPE_INT64
35 |     dims: [2]
36 |   }
37 | ]
38 | 


--------------------------------------------------------------------------------
/models/postprocess/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "python"
 2 | 
 3 | input [
 4 |   {
 5 |     name: "bboxes"
 6 |     data_type: TYPE_FP32
 7 |     dims: [-1,4]
 8 |   },
 9 |   {
10 |     name: "classes"
11 |     data_type: TYPE_INT64
12 |     dims: [-1]
13 |   },
14 |   {
15 |     name: "masks"
16 |     data_type: TYPE_FP32
17 |     dims: [-1, 1, -1, -1]
18 |   },
19 |   {
20 |     name: "scores"
21 |     data_type: TYPE_FP32
22 |     dims: [-1]
23 |   },
24 |   {
25 |     name: "shape"
26 |     data_type: TYPE_INT64
27 |     dims: [2]
28 |   }
29 | ]
30 | 
31 | output [
32 |   {
33 |     name: "post_bboxes"
34 |     data_type: TYPE_FP32
35 |     dims: [-1,4]
36 |   },
37 |   {
38 |     name: "post_classes"
39 |     data_type: TYPE_INT64
40 |     dims: [-1]
41 |   },
42 |   {
43 |     name: "post_masks"
44 |     data_type: TYPE_UINT8
45 |     dims: [-1, -1, -1]
46 |   },
47 |   {
48 |     name: "post_scores"
49 |     data_type: TYPE_FP32
50 |     dims: [-1]
51 |   }
52 | ]
53 | 
54 | parameters: {
55 |   key: "EXECUTION_ENV_PATH",
56 |   value: {string_value: "$$TRITON_MODEL_DIRECTORY/pyenv.tar.gz"}
57 | }
58 | 


--------------------------------------------------------------------------------
/models/preprocess/config.pbtxt:
--------------------------------------------------------------------------------
 1 | backend: "python"
 2 | 
 3 | input [
 4 |   {
 5 |     name: "image_bytes"
 6 |     data_type: TYPE_STRING
 7 |     dims: [1]
 8 |   }
 9 | ]
10 | 
11 | output [
12 |   {
13 |     name: "preprocessed_image"
14 |     data_type: TYPE_UINT8
15 |     dims: [3, -1, -1]
16 |   }
17 | ]
18 | 
19 | 
20 | parameters: {
21 |   key: "EXECUTION_ENV_PATH",
22 |   value: {string_value: "$$TRITON_MODEL_DIRECTORY/pyenv.tar.gz"}
23 | }
24 | 


--------------------------------------------------------------------------------
/python_models/postprocess.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import numpy as np
 3 | import triton_python_backend_utils as pb_utils
 4 | 
 5 | 
 6 | class TritonPythonModel:
 7 |     def get_inputs(self, request):
 8 |         input_tensor_names = \
 9 |             ['bboxes', 'classes', 'masks', 'scores', 'shape']
10 |         inputs = {
11 |             tensor_name: pb_utils.get_input_tensor_by_name(request, tensor_name).as_numpy()
12 |                   for tensor_name in input_tensor_names
13 |         }
14 |         return inputs
15 | 
16 |     def paste_mask(self, mask, box, img_h, img_w, threshold):
17 |         """
18 |         Paste raw masks with fixed resolution from the mask head to an image
19 |         NOTE: You can find the better implementation from:
20 |         https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/mask_ops.py
21 | 
22 |         This method largely based on "paste_mask_in_image_old" from mask_ops.py.
23 |         I used it for the sake of simplicity.
24 | 
25 |         Args:
26 |             mask: M x M array where M is the Pooler resolution of your mask head
27 |             box: array of shape (4,)
28 |             img_h, img_w (int): Image height and width.
29 |             threshold (float): Mask binarization threshold in [0, 1].
30 |         Return:
31 |             im_mask (Tensor):
32 |                 The resized and binarized object mask pasted into the original
33 |                 image plane (a tensor of shape (img_h, img_w)).
34 |         """
35 |         box = box.astype(np.int)
36 |         # Resize the mask to the size of the bbox
37 |         samples_w = box[2] - box[0] + 1
38 |         samples_h = box[3] - box[1] + 1
39 |         mask = Image.fromarray(mask)
40 |         mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR)
41 |         mask = np.array(mask, copy=False)
42 |         mask = np.array(mask > threshold, dtype=np.uint8)
43 | 
44 |         im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
45 |         x_0 = max(box[0], 0)
46 |         x_1 = min(box[2] + 1, img_w)
47 |         y_0 = max(box[1], 0)
48 |         y_1 = min(box[3] + 1, img_h)
49 | 
50 |         im_mask[y_0:y_1, x_0:x_1] = mask[
51 |             (y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])
52 |         ]
53 |         return im_mask
54 | 
55 |     def postprocess(self, predictions):
56 |         img_h, img_w = predictions.pop('shape')
57 |         # Filter out predictions with low confidence scores
58 |         scores = predictions['scores']
59 |         predictions = {name: tensor[scores > 0.5, ...] for name, tensor in predictions.items()}
60 |         # Paste masks to the full image
61 |         full_masks = [
62 |             self.paste_mask(mask[0, :, :], box, img_h, img_w, 0.5)
63 |             for mask, box in zip(predictions['masks'], predictions['bboxes'])
64 |         ]
65 |         predictions['masks'] = np.stack(full_masks, axis=0)
66 |         return predictions
67 | 
68 |     def execute(self, requests):
69 |         responses = []
70 |         for request in requests:
71 |             predictions = self.get_inputs(request)
72 |             predictions = self.postprocess(predictions)
73 |             # prepare outputs for the reponse
74 |             out_tensors = []
75 |             for name in ['bboxes', 'classes', 'scores', 'masks']:
76 |                 tensor = pb_utils.Tensor('post_' + name, predictions[name])
77 |                 out_tensors.append(tensor)
78 |             response = pb_utils.InferenceResponse(output_tensors=out_tensors)
79 |             responses.append(response)
80 |         return responses
81 | 


--------------------------------------------------------------------------------
/python_models/preprocess.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import numpy as np
 3 | from PIL import Image
 4 | import triton_python_backend_utils as pb_utils
 5 | 
 6 | 
 7 | class TritonPythonModel:
 8 |     def execute(self, requests):
 9 |         responses = []
10 |         for request in requests:
11 |             image_bytes = \
12 |                 pb_utils.get_input_tensor_by_name(request, "image_bytes").as_numpy()[0]
13 |             pil_img = Image.open(io.BytesIO(image_bytes))
14 |             img = np.array(pil_img)
15 |             img = np.ascontiguousarray(img.transpose(2, 0, 1))
16 |             out_tensor = pb_utils.Tensor("preprocessed_image", img)
17 |             response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
18 |             responses.append(response)
19 |         return responses
20 | 
21 | 


--------------------------------------------------------------------------------
/python_models/requirements.txt:
--------------------------------------------------------------------------------
1 | conda-pack==0.6.0
2 | numpy==1.23.1
3 | Pillow==9.2.0
4 | 


--------------------------------------------------------------------------------
/run_triton.sh:
--------------------------------------------------------------------------------
1 | docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 --shm-size=1gb\
2 |     -v /home/tin/Workspace/triton/deploy-detectron2-with-triton/models:/models \
3 |     nvcr.io/nvidia/tritonserver:22.07-py3 tritonserver --model-repository=/models
4 | 


--------------------------------------------------------------------------------